o
    hC                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlm  m	Z	 d dl
m  m  mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZ ejjejjgZdZejdd	d
d Z G dd dZ!dS )    N)Trainer)pretrain)utils)TEST_MODELS_DIR)
train_filedev_file	test_fileDATASET	SENTENCES   module)scopec           
      C   s   t tdd tD }|dd }| d}|d }|d }tjt|tf}t|dd	d
.}t	||D ] \}}|
| |
d |
ddd |D  |
d q5W d   n1 s`w   Y  tt|t|}	|	  tj|s{J |S )zR
    will return a path to a fake embeddings file with the words in SENTENCES
    c                 S   s   g | ]}|D ]}|  qqS  )lower).0yxr   r   c/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/classifiers/test_classifier.py
<listcomp>   s    z#fake_embeddings.<locals>.<listcomp>Ndatazembedding.txtzembedding.ptwzutf-8)encoding	c                 s   s    | ]}t |V  qd S N)strr   r   r   r   r   	<genexpr>(   s    z"fake_embeddings.<locals>.<genexpr>
)sortedsetr
   mktempnprandomlenEMB_DIMopenzipwritejoinr   Pretrainr   loadospathexists)
tmp_path_factorywordsembedding_dirembedding_txtembedding_pt	embeddingfoutwordembptr   r   r   fake_embeddings   s$   


r9   c                   @   s   e Zd ZdddZdddZdd Zdd	 Zd
d Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )TestClassifierNc                 C   s   t |d }d}d|d|dt |dddd	d
t |dt |ddddg}	|dur*|	| }	t|	}	t|	j|	j|	j}
|rDtj	||	dd}nt
|	|
}||
|	fS )zD
        Build a model to be used by one of the later tests
        
classifierzmodel.ptz
--save_dirz--save_namez--wordvec_pretrain_file--filter_channels20z--fc_shapesz20,10z--train_filez
--dev_file--max_epochs2z--batch_size60NT)load_optimizer)r   r;   
parse_argsr   read_datasetr   wordvec_typemin_train_lenr   r+   build_new_model)selftmp_pathr9   r   r   
extra_argscheckpoint_filesave_dir	save_nameargs	train_settrainerr   r   r   build_model1   s(   	

zTestClassifier.build_modelc              	   C   s   |  ||||||\}}}	t|	j|	j|	j}
t|}tj	|	j
|	j}|du r2t|	j
||	j}t||||	||
| |||fS )z5
        Iterate a couple times over a model
        N)rP   r   rC   r   rD   rE   dataset_labelsr,   r-   r)   rK   rL   r   checkpoint_namecheckpoint_save_namer;   train_model)rG   rH   r9   r   r   rI   rJ   rO   rN   rM   dev_setlabelssave_filenamer   r   r   run_trainingJ   s   

zTestClassifier.run_trainingc                 C   s   | j ||||ddgd dS )z8
        Test that building a basic model works
        --bilstm_hidden_dimr=   rI   N)rP   rG   rH   r9   r   r   r   r   r   test_build_modelX   s   zTestClassifier.test_build_modelc           	      C   sh   | j ||||ddgd\}}}tj|j|j}|| |j|_t	|j|}||_t	|j|}dS )z9
        Test that a basic model can save & load
        rY   r=   rZ   N)
rP   r,   r-   r)   rK   rL   save	load_namer   r+   )	rG   rH   r9   r   r   rO   _rM   rW   r   r   r   test_save_load^   s   
zTestClassifier.test_save_loadc                 C   s   | j ||||ddgd d S )NrY   r=   rZ   rX   r[   r   r   r   test_train_basicl   s   zTestClassifier.test_train_basicc                 C   s6   g d}|  ||||| dg}|  ||||| dS )zE
        Test w/ and w/o bilstm variations of the classifier
        )z--bilstmrY   r=   --no_bilstmNra   rG   rH   r9   r   r   rM   r   r   r   test_train_bilstmo   s   z TestClassifier.test_train_bilstmc                 C   sR   g d}|  ||||| g d}|  ||||| g d}|  ||||| dS )z
        Test various maxpool widths

        Also sets --filter_channels to a multiple of 2 but not of 3 for
        the test to make sure the math is done correctly on a non-divisible width
        )--maxpool_width1r<   r=   rY   r=   )rf   r?   r<   r=   rY   r=   )rf   3r<   r=   rY   r=   Nra   rd   r   r   r   test_train_maxpool_widthy   s   z'TestClassifier.test_train_maxpool_widthc                 C   sR   g d}|  ||||| g d}|  ||||| g d}|  ||||| d S )N)--filter_sizesz(3,4,5)r<   r=   rY   r=   )rj   z((3,2),)r<   r=   rY   r=   )rj   	((3,2),3)r<   r=   rY   r=   ra   rd   r   r   r   test_train_conv_2d   s   z!TestClassifier.test_train_conv_2dc                 C   sd   g d}|  |||||\}}}|jjdksJ g d}|  |||||\}}}|jjdks0J d S )N)rj   rk   r<   r=   rc   (   )rj   rk   r<   z15,20rc   2   )rX   modelfc_input_size)rG   rH   r9   r   r   rM   rO   r_   r   r   r   test_train_filter_channels   s   z)TestClassifier.test_train_filter_channelsc           
   	   C   s   d}| j ||||ddd|gd\}}}tj|sJ tj|dd dd	}	|	d
 d d r/J tdd |	d
 d  D r@J dS )zx
        Test on a tiny Bert WITHOUT finetuning, which hopefully does not take up too much disk space or memory
        hf-internal-testing/tiny-bertrY   r=   --bert_modelrZ   c                 S      | S r   r   storagelocr   r   r   <lambda>       z0TestClassifier.test_train_bert.<locals>.<lambda>Tweights_onlyparamsconfigforce_bert_savedc                 s       | ]}| d V  qdS 
bert_modelN
startswithr   r   r   r   r          z1TestClassifier.test_train_bert.<locals>.<genexpr>ro   NrX   r,   r-   r.   torchr+   anykeys
rG   rH   r9   r   r   r   rO   rW   r_   saved_modelr   r   r   test_train_bert   s   "&zTestClassifier.test_train_bertc           
   
   C   s   d}| j ||||ddd|dgd\}}}tj|sJ tj|dd d	d
}	|	d d d s0J tdd |	d d  D sAJ dS )zu
        Test on a tiny Bert WITH finetuning, which hopefully does not take up too much disk space or memory
        rr   rY   r=   rs   --bert_finetunerZ   c                 S   rt   r   r   ru   r   r   r   rx      ry   z3TestClassifier.test_finetune_bert.<locals>.<lambda>Trz   r|   r}   r~   c                 s   r   r   r   r   r   r   r   r      r   z4TestClassifier.test_finetune_bert.<locals>.<genexpr>ro   Nr   r   r   r   r   test_finetune_bert   s   $&z!TestClassifier.test_finetune_bertc                    s  d}| j ||||ddd|ddddgd	\}}}tj|sJ tj|d
 }	ttj|	dtdks9J d
 tj	dd ddttj|	d}
t|
dksYJ |
d
 }
tj	|
dd ddt
dD ]5fddd d  D }t|d
ksJ tfdd|D sJ tfdd|D rJ qkdvsJ dd< t|
 | j ||||ddd|ddddddg
|d\}}}ttj|	d}t|dksJ |
|d
 ksJ tj	|
d d dddv sJ ttj|	d!}t|dksJ tj	|d
 d"d dd t
dD ]9fd#d d d  D }t|d
ks0J t fd$d|D s?J t fd%d|D rNJ qd&S )'a{  Test on a tiny Bert WITH finetuning, which hopefully does not take up too much disk space or memory, using 2 layers

        As an added bonus (or eager test), load the finished model and continue
        training from there.  Then check that the initial model and
        the middle model are different, then that the middle model and
        final model are different

        rr   rY   r=   rs   r   z--bert_hidden_layersr?   --save_intermediate_modelsrZ   r   *E0000*   c                 S   rt   r   r   ru   r   r   r   rx      ry   z:TestClassifier.test_finetune_bert_layers.<locals>.<lambda>Trz   *E0002*c                 S   rt   r   r   ru   r   r   r   rx      ry      c                    &   g | ]}| d rd  |v r|qS r   z	layer.%d.r   r   	layer_idxr   r   r         & z<TestClassifier.test_finetune_bert_layers.<locals>.<listcomp>r|   ro   c                 3   0    | ]}| d  d v o|d  d v V  qdS r|   ro   Nr   r   initial_modelsecond_modelr   r   r         . z;TestClassifier.test_finetune_bert_layers.<locals>.<genexpr>c                 3   8    | ]}t  d  d |d  d |V  qdS r   r   allclosegetr   r   r   r   r         6 asdfi  r>   5rI   rJ   c                 S   rt   r   r   ru   r   r   r   rx      ry   *E0005*c                 S   rt   r   r   ru   r   r   r   rx      ry   c                    r   r   r   r   r   r   r   r      r   c                 3   r   r   r   r   final_modelr   r   r   r      r   c                 3   r   r   r   r   r   r   r   r      r   N)rX   r,   r-   r.   splitglobr)   r$   r   r+   ranger   allr]   )rG   rH   r9   r   r   r   rO   rW   rJ   	save_pathsecond_model_file
bert_namessecond_model_file_redofifth_model_filer   )r   r   r   r   r   test_finetune_bert_layers   sF   	*0"z(TestClassifier.test_finetune_bert_layersc                 C   sV  d}| j ||||ddd|ddddgd	\}}}tj|sJ tj|d
d dd}	|	d d d |ks5J |	d d d s?J |	d d d sIJ |	d d d rSJ |	d d d r]J t|	d d dksiJ tdd |	d d D sxJ tdd |	d d D sJ tdd |	d d  D rJ t	j
ddtd|t|d}
|
d }dS )!z:
        Test on a tiny Bert with PEFT finetuning
        rr   rY   r=   rs   r   
--use_peft--lora_modules_to_savepoolerrZ   c                 S   rt   r   r   ru   r   r   r   rx      ry   z3TestClassifier.test_finetune_peft.<locals>.<lambda>Trz   r|   r}   r   r~   use_pefthas_charlm_forwardhas_charlm_backward	bert_lorar   c                 s       | ]
}| d dkV  qdS )z.pooler.r   Nfindr   r   r   r   r         z4TestClassifier.test_finetune_peft.<locals>.<genexpr>c                 s   r   	.encoder.r   Nr   r   r   r   r   r     r   c                 s   r   r   r   r   r   r   r   r     r   ro   enNztokenize,sentiment)download_method	model_dir
processorssentiment_model_pathsentiment_pretrain_pathzThis is a test)rX   r,   r-   r.   r   r+   r$   r   r   stanzaPipeliner   r   )rG   rH   r9   r   r   r   rO   rW   r_   r   pipelinedocr   r   r   test_finetune_peft   s   *"z!TestClassifier.test_finetune_peftc                    s  d}| j ||||ddd|ddddd	g	d
\}}}tj|s J tj|dd dd}	tdd |	d d D s9J | j ||||ddd|ddddd	ddg|d\}}}tj|d }
ttj	|
d}t
|dkslJ |d }tj|dd dd}ttj	|
d}t
|dksJ |d }tj|dd dd}ttj	|
d}t
|dksJ |d }tj|dd dd}|d d }|d d }|d d }dD ]dD ]{ t fd d!|D }t fd"d!|D }t fd#d!|D }t
|dksJ t
|t
|ksJ t
|t
|ksJ t||D ]0\}}||s(J d$krKt||||r<J t||||rKJ qqqd%S )&z[
        Test that if we restart training on a peft model, the peft weights change
        rr   rY   r=   rs   r   r   r   r   r   rZ   c                 S   rt   r   r   ru   r   r   r   rx     ry   z;TestClassifier.test_finetune_peft_restart.<locals>.<lambda>Trz   c                 s   r   r   r   r   r   r   r   r     r   z<TestClassifier.test_finetune_peft_restart.<locals>.<genexpr>r|   r   r>   r   r   r   r   r   c                 S   rt   r   r   ru   r   r   r   rx     ry   r   c                 S   rt   r   r   ru   r   r   r   rx   $  ry   r   c                 S   rt   r   r   ru   r   r   r   rx   )  ry   )_A.z_B.)z.0.z.1.c                    :   g | ]}| d dkr| dkr|  dkr|qS r   r   r   r   layersider   r   r   2     : z=TestClassifier.test_finetune_peft_restart.<locals>.<listcomp>c                    r   r   r   r   r   r   r   r   3  r   c                    s:   g | ]}| d dkr|dkr| dkr|qS )zencoder.r   )r   r   r   r   r   r   r   4  r   r   N)rX   r,   r-   r.   r   r+   r   r   r   r)   r$   r   r'   endswithr   r   )rG   rH   r9   r   r   r   rO   	save_filerJ   r   r   initial_model_filer   r   r   final_model_filer   initial_lorasecond_lora
final_lorainitial_paramssecond_paramsfinal_paramsr   r   r   r   r   test_finetune_peft_restart  sN   ,2
z)TestClassifier.test_finetune_peft_restart)NN)__name__
__module____qualname__rP   rX   r\   r`   rb   re   ri   rl   rq   r   r   r   r   r   r   r   r   r   r:   0   s    




8r:   )"r   r,   pytestnumpyr"   r   r   stanza.models.classifiermodelsr;   stanza.models.classifiers.dataclassifiersr   !stanza.models.classifiers.trainerr   stanza.models.commonr   r   stanza.testsr   "stanza.tests.classifiers.test_datar   r   r   r	   r
   markr   travis
pytestmarkr%   fixturer9   r:   r   r   r   r   <module>   s$    

