o
    hH                     @   s   d Z ddlZddlZddlT ddlmZmZmZ ejj	Z
dZd Zd Zdd	 Zd
d Zdd Zdd Zdd Zdd ZdS )z 
Basic testing of lemmatization
    N)*)TEXTUPOSLEMMAz!Joe Smith was born in California.zG
Joe Joe
Smith Smith
was was
born born
in in
California California
. .
zF
Joe Joe
Smith Smith
was be
born bear
in in
California California
. .
c                  C   sj   t jdi dtddddd i} | t}g }| D ]}||j d|j g7 }qtd|ks3J d S )	Nztokenize,lemmaenT)
processorsdirlanglemma_use_identitydownload_method 
 )	stanzaPipelineTEST_MODELS_DIREN_DOC
iter_wordstextlemmaEN_DOC_IDENTITY_GOLDjoinnlpdocword_lemma_pairswr   r   `/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/pipeline/test_lemmatizer.pytest_identity_lemmatizer$   s   "r   c                  C   sh   t jdi dtdddd i} | t}g }| D ]}||j d|j g7 }qtd|ks2J d S )Ntokenize,pos,lemmar   r   r   r	   r   r   r   r   )	r   r   r   r   r   r   r   EN_DOC_LEMMATIZER_MODEL_GOLDr   r   r   r   r   test_full_lemmatizer,   s    r"   c                    sL   t dD ]} d   | jvr!t fdd| j D r!   S qtd)N
   zc                 3   s    | ]	}|d   kV  qdS )r   Nr   ).0xbaser   r   	<genexpr>7   s    z$find_unknown_word.<locals>.<genexpr>zwtf?)range	word_dictallcomposite_dictkeysRuntimeError)
lemmatizerr(   ir   r'   r   find_unknown_word4   s   &r2   c                  C   s  t jdi dtdddd d} | jd j}t|d}t|d}t|d	}| d
| d | d | }|ttt	g}t
|dksDJ |d d |ksNJ |d d |ksXJ |d d |ksbJ |j||d d f |d d ksuJ |j||d d f |d d ksJ |j||d d f |d d ksJ | d
| d | d | }|ttt	g}||ksJ t|d}	t|d}
t|d}| d|	 d |
 d | }|ttt	g}t
|dksJ |d d |	ksJ |d d |
ksJ |d d |ksJ |j|	|d d f |d d ksJ |j|
|d d f |d d ks+J |j||d d f |d d ks?J | d|	 d |
 d | }|ttt	g}||ks\J ||jvsdJ d S )Nr   r   r    T)lemma_store_resultsr   r   abczI found an z in my z.  It was a       r               defz	It was a z.  I found an    r   )r   r   r   r   _trainerr2   getr   r   r   lenr-   r+   )r   r0   azbzczr   stuffdoc2stuff2dzezfzr   r   r   test_store_results;   sB   "


&&&


(((rM   c                  C   sn   t jddtdd} | d}|jd jd jdksJ t jddtdd	d
} | d}|jd jd jdks5J dS )z[
    Test that setting the lemmatizer as caseless at Pipeline time lowercases the text
    r   r   N)r   	model_dirr   zHere is an Excerptr   excerptT)r   rN   r   lemma_caselessExcerpt)r   r   r   	sentenceswordsr   )r   r   r   r   r   test_caseless_lemmatizerk   s   rU   c                  C   s   t jdddtdd} | jd }|jd sJ | d}d	 }t|jd
ks'J t|jd jdks3J t	|jd j|D ]\}}|j
|ksGJ q<dS )z,
    Test the Latin caseless lemmatizer
    laittbr   N)packager   rN   r   r   caselesszQuod Erat Demonstrandumzqui sum demonstror;   r   r8   )r   r   r   r   configsplitrC   rS   rT   zipr   )r   r0   r   expected_lemmaswordexpectedr   r   r   test_latin_caseless_lemmatizerz   s   
r`   )__doc__pytestr   stanza.testsstanza.models.common.docr   r   r   markpipeline
pytestmarkr   stripr   r!   r   r"   r2   rM   rU   r`   r   r   r   r   <module>   s&    
0