o
    h                     @   s   d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ejjejjgZdZdZG d	d
 d
ZdS )z
Currently tests a few configurations of files for creating a charlm vocab

Also has a skeleton test of loading & saving a charlm
    )CounterN)charlm)
char_model)TEST_MODELS_DIRz+
Unban mox opal!
I hate watching Peppa Pig
z
This is plastic cheese
c                   @   st   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Ze	j
dddd Ze	j
dddd Zdd Zdd ZdS )TestCharModelc              	   C   s   t  0}tj|d}t|ddd}|t W d    n1 s#w   Y  t	|}W d    n1 s7w   Y  tD ]}||v sFJ q>d|vsMJ d S Nztext.txtwutf-8encodingQ
tempfileTemporaryDirectoryospathjoinopenwritefake_text_1r   build_charlm_vocabselftempdirsample_filefoutvocabi r   ^/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/common/test_char_model.pytest_single_file_vocab      
z$TestCharModel.test_single_file_vocabc              	   C   s   t  1}tj|d}tj|ddd}|t W d    n1 s$w   Y  t	
|}W d    n1 s8w   Y  tD ]}||v sGJ q?d|vsNJ d S )Nztext.txt.xzwtr	   r
   r   )r   r   r   r   r   lzmar   r   r   r   r   r   r   r   r   test_single_file_xz_vocab*   s   
z'TestCharModel.test_single_file_xz_vocabc              	   C   s   t  0}tj|d}t|ddd}|t W d    n1 s#w   Y  t	|}W d    n1 s7w   Y  tD ]}||v sFJ q>d|vsMJ d S r   r   r   r   r   r   test_single_file_dir_vocab5   r!   z(TestCharModel.test_single_file_dir_vocabc              	   C   s   t  T}tj|d}t|ddd}|t W d    n1 s#w   Y  tj|d}tj|ddd}|t	 W d    n1 sGw   Y  t
|}W d    n1 s[w   Y  tD ]}||v sjJ qbt	D ]}||v suJ qmd|vs|J d S )Nt1.txtr   r	   r
   	t2.txt.xzr"   r   )r   r   r   r   r   r   r   r   r#   fake_text_2r   r   r   r   r   r   test_multiple_files_vocab@   s    
	z'TestCharModel.test_multiple_files_vocabc              	   C   s  t  V}tj|d}t|ddd}|t W d    n1 s#w   Y  tj|d}tj|ddd}|t	 W d    n1 sGw   Y  t
j|dd}W d    n1 s]w   Y  tttt	 }| D ]\}}|dk r}||vs|J qn||v sJ qnd S )	Nr&   r   r	   r
   r'   r"      )cutoff)r   r   r   r   r   r   r   r   r#   r(   r   r   r   most_common)r   r   r   r   r   countslettercountr   r   r   test_cutoff_vocabP   s"   

zTestCharModel.test_cutoff_vocabc                 C   s8  t  }tj|d}t|ddd}|t W d   n1 s$w   Y  tj|d}t|ddd#}tdD ]}|t |d |t	 |d q<W d   n1 s]w   Y  d	}d
}d}d|d|ddddddddt
t ddd|d|d|d|g}	t|	}	t|	 tjtj||sJ tjtj||sJ tjtj||}
tjtj||sJ tjtj||}
tj|	tj||}|jdksJ |jdksJ t||	 ttj||}tj|	|}t||	|	d ks	J W d   dS 1 sw   Y  dS )zQ
        Test the whole thing on a small dataset for an iteration or two
        zen_test.dev.txtr   r	   r
   Nzen_test.train.txti  
zen_test.forward.ptzen_text.vocab.ptzen_text.checkpoint.ptz--train_filez--eval_filez--eval_steps0z--epochs2z--cutoff1z--batch_sizez%dz--shorthanden_testz
--save_dirz--save_namez--vocab_save_namez--checkpoint_save_namer   r*   lr0)r   r   r   r   r   r   r   r   ranger(   lenr   
parse_argstrainexistsr   CharacterLanguageModelloadCharacterLanguageModelTrainerglobal_stepepochget_current_lrload_char_vocabfrom_new_model)r   r   	eval_filer   
train_filer   	save_namevocab_save_namecheckpoint_save_nameargsmodeltrainerr   r   r   r   test_build_modelb   sX   




$zTestCharModel.test_build_modelclass)scopec                 C   @   t jtddd}t|}t|dksJ |d }tj|S )Nenforward_charlm*   r   	r   r   r   r   globr8   r   r<   r=   r   models_pathmodels
model_filer   r   r   english_forward   
   
zTestCharModel.english_forwardc                 C   rO   )NrP   backward_charlmrR   rS   r   rT   rV   r   r   r   english_backward   r[   zTestCharModel.english_backwardc                 C   s   |j sJ |j r
J dS )z9
        Check that basic loading functions work
        N)is_forward_lm)r   rZ   r]   r   r   r   test_load_model   s   
zTestCharModel.test_load_modelc                 C   sr   t  +}||fD ]}tj|dd}|| tj|}|j	|j	ks&J q	W d   dS 1 s2w   Y  dS )z,
        Load, save, and load again
        resavedz	charlm.ptN)
r   r   r   r   r   saver   r<   r=   r^   )r   rZ   r]   r   rJ   	save_filereloadedr   r   r   test_save_load_model   s   

"z"TestCharModel.test_save_load_modelN)__name__
__module____qualname__r    r$   r%   r)   r0   rL   pytestfixturerZ   r]   r_   rd   r   r   r   r   r      s    
8

	
	r   )__doc__collectionsr   rU   r#   r   r   rh   stanza.modelsr   stanza.models.commonr   stanza.testsr   marktravispipeline
pytestmarkr   r(   r   r   r   r   r   <module>   s    