o
    hi                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlm  m  mZ d dl	T ej
jej
jgZdd Zdd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    N)*c               	   C   s   t jt dd-} tt tj| dd W d   n1 s!w   Y  W d   dS W d   dS 1 s9w   Y  dS )zN
    get_wordvec_file should fail if neither word2vec nor fasttext exists
    /outdiren_foowordvec_dir	shorthandN)tempfileTemporaryDirectoryTEST_WORKING_DIRpytestraisesFileNotFoundErrorutilsget_wordvec_file)temp_dir r   Y/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/common/test_utils.pytest_wordvec_not_found   s   "r   c                  C      t jt dd3} tj| dd}t| tj|d}t|d}|  t	j
| dd}||ks4J W d	   d	S 1 s?w   Y  d	S )
z2
    Test searching for word2vec and xz files
    r   r   word2vecEnglishzen.vectors.xzwr   r   Nr
   r   r   ospathjoinmakedirsopencloser   r   )r   word2vec_dir	fake_filefoutfilenamer   r   r   test_word2vec_xz      

"r%   c                  C   r   )
z3
    Test searching for fasttext and txt files
    r   r   fasttextr   en.vectors.txtr   r   r   Nr   )r   fasttext_dirr"   r#   r$   r   r   r   test_fasttext_txt(   r&   r*   c               	   C   s   t jt ddX} tj| dd}t| tj|d}t|d}|  t	j
| ddd}||ks5J tt t	j
| dd	 W d
   n1 sLw   Y  W d
   d
S W d
   d
S 1 sdw   Y  d
S )zN
    If we supply our own wordvec type, get_wordvec_file should find that
    r   r   googler   r(   r   r   )r   r	   wordvec_typer   N)r
   r   r   r   r   r   r   r   r    r   r   r   r   r   )r   
google_dirr"   r#   r$   r   r   r   test_wordvec_type:   s   

"r.   c                  C   sj   g dddgdgg} t j| td\}}|dgddgg dfks!J |dks'J t ||}| |ks3J d S )N)                  )key)r0   r/   r   r   sort_with_indiceslenunsort)dataorderedorig_idxunsortedr   r   r   test_sort_with_indicesP   s   r>   c                  C   sJ   t g \} }t| dksJ t|dksJ t | |}g |ks#J d S )Nr   r6   )r;   r<   r=   r   r   r   test_empty_sort_with_indicesY   s
   r?   c                  C   s  g } t dD ]}| dddt|g q| g d | ddg t| d}|g dks0J t| d	}|g d
ks>J t| dd  d	}|g dksPJ tj| tdd\}}dd |D g dksgJ t|d	}|g dksuJ | t||ksJ d S )Nr3   Unbanmoxopal)
Dozn'tbanUrzaz'sSagathatcardisgreatBanRagavan)r   r/   )r/   r0   )r0   r1   )r1   r2   r2   r3   r3   r4   r4         )r   r0   r0   r2   rN   rO   rP   r/   )rS   rT   rN   rO   T)r5   reversec                 S   s   g | ]}t |qS r   )r8   ).0xr   r   r   
<listcomp>x   s    z+test_split_into_batches.<locals>.<listcomp>)
   r2   r2   r2   r2   r2   r0   )rM   )r/   r1   )r1   r3   )r3   rQ   )rangeappendstrr   split_into_batchesr7   r8   r9   )r:   ibatchesr;   r<   r   r   r   test_split_into_batchesb   s    r`   c                   C   sl   t g dg dg ksJ t g dg ddgksJ t ddgddggddgddggdgks4J d S )N)OPERLOC)ra   rb   rc   ORGrd   ra   rb   rc   )r   find_missing_tagsr   r   r   r   test_find_missing_tags   s   2rf   c            	   	   C   s  d} t  }tj|d}t|d}||  W d   n1 s#w   Y  t|}|	 }| |ks8J W d   n1 sBw   Y  |j
sLJ zt|
}|j
rXJ td1 s_w   Y  W n	 tyn   Y nw |j
stJ tj|d}t|d}||  W d   n1 sw   Y  t|}|	 }| |ksJ W d   n1 sw   Y  |j
sJ zt|
}|j
rJ td1 sw   Y  W n	 ty   Y nw |j
sJ W d   dS 1 sw   Y  dS )z9
    test that we can read either .xz or regular txt
    zthis is a testzfoo.txtr   Nzunban mox opal!z
foo.txt.xzwt)r
   r   r   r   r   r   writer   open_read_textreadclosed
ValueErrorlzma)	TEXTtempdirr$   r#   finin_textfinexfinxzfinexxzr   r   r   test_open_read_text   sN   





"ru   c                  C   sj   t ddd} tj| dksJ t ddd} tj| dks"J t ddd} tj| dks3J dS )	z=
    Test some expected results for the checkpoint names
    saved_modelszkk_oscar_forward_charlm.ptN)rv   z%kk_oscar_forward_charlm_checkpoint.ptkk_oscar_forward_charlm)rv   "kk_oscar_forward_charlm_checkpointothername.pt)rv   ry   )r   checkpoint_namer   r   split)
checkpointr   r   r   test_checkpoint_name   s   r}   )rm   r   r
   r   stanzastanza.models.common.utilsmodelscommonr   stanza.testsmarktravispipeline
pytestmarkr   r%   r*   r.   r>   r?   r`   rf   ru   r}   r   r   r   r   <module>   s$    			 -