o
    h%                     @   s~   d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ejjejjgZd Zd ZG d	d
 d
ZdS )zz
Run the tagger for a couple iterations on some fake data

Uses a couple sentences of UD_English-EWT as training/dev data
    N)parser)pretrain)TrainerTEST_WORKING_DIRa  
# sent_id = weblog-juancole.com_juancole_20051126063000_ENG_20051126_063000-0003
# text = DPA: Iraqi authorities announced that they had busted up 3 terrorist cells operating in Baghdad.
1	DPA	DPA	PROPN	NNP	Number=Sing	0	root	0:root	SpaceAfter=No
2	:	:	PUNCT	:	_	1	punct	1:punct	_
3	Iraqi	Iraqi	ADJ	JJ	Degree=Pos	4	amod	4:amod	_
4	authorities	authority	NOUN	NNS	Number=Plur	5	nsubj	5:nsubj	_
5	announced	announce	VERB	VBD	Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin	1	parataxis	1:parataxis	_
6	that	that	SCONJ	IN	_	9	mark	9:mark	_
7	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	9	nsubj	9:nsubj	_
8	had	have	AUX	VBD	Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin	9	aux	9:aux	_
9	busted	bust	VERB	VBN	Tense=Past|VerbForm=Part	5	ccomp	5:ccomp	_
10	up	up	ADP	RP	_	9	compound:prt	9:compound:prt	_
11	3	3	NUM	CD	NumForm=Digit|NumType=Card	13	nummod	13:nummod	_
12	terrorist	terrorist	ADJ	JJ	Degree=Pos	13	amod	13:amod	_
13	cells	cell	NOUN	NNS	Number=Plur	9	obj	9:obj	_
14	operating	operate	VERB	VBG	VerbForm=Ger	13	acl	13:acl	_
15	in	in	ADP	IN	_	16	case	16:case	_
16	Baghdad	Baghdad	PROPN	NNP	Number=Sing	14	obl	14:obl:in	SpaceAfter=No
17	.	.	PUNCT	.	_	1	punct	1:punct	_

# sent_id = weblog-juancole.com_juancole_20051126063000_ENG_20051126_063000-0004
# text = Two of them were being run by 2 officials of the Ministry of the Interior!
1	Two	two	NUM	CD	NumForm=Word|NumType=Card	6	nsubj:pass	6:nsubj:pass	_
2	of	of	ADP	IN	_	3	case	3:case	_
3	them	they	PRON	PRP	Case=Acc|Number=Plur|Person=3|PronType=Prs	1	nmod	1:nmod:of	_
4	were	be	AUX	VBD	Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin	6	aux	6:aux	_
5	being	be	AUX	VBG	VerbForm=Ger	6	aux:pass	6:aux:pass	_
6	run	run	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	0	root	0:root	_
7	by	by	ADP	IN	_	9	case	9:case	_
8	2	2	NUM	CD	NumForm=Digit|NumType=Card	9	nummod	9:nummod	_
9	officials	official	NOUN	NNS	Number=Plur	6	obl	6:obl:by	_
10	of	of	ADP	IN	_	12	case	12:case	_
11	the	the	DET	DT	Definite=Def|PronType=Art	12	det	12:det	_
12	Ministry	Ministry	PROPN	NNP	Number=Sing	9	nmod	9:nmod:of	_
13	of	of	ADP	IN	_	15	case	15:case	_
14	the	the	DET	DT	Definite=Def|PronType=Art	15	det	15:det	_
15	Interior	Interior	PROPN	NNP	Number=Sing	12	nmod	12:nmod:of	SpaceAfter=No
16	!	!	PUNCT	.	_	6	punct	6:punct	_

a}  
1	From	from	ADP	IN	_	3	case	3:case	_
2	the	the	DET	DT	Definite=Def|PronType=Art	3	det	3:det	_
3	AP	AP	PROPN	NNP	Number=Sing	4	obl	4:obl:from	_
4	comes	come	VERB	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	0	root	0:root	_
5	this	this	DET	DT	Number=Sing|PronType=Dem	6	det	6:det	_
6	story	story	NOUN	NN	Number=Sing	4	nsubj	4:nsubj	_
7	:	:	PUNCT	:	_	4	punct	4:punct	_

c                   @   sb   e Zd Zejdddd ZdddZd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )
TestParserclass)scopec                 C   s
   t  dS )Nz/in/tiny_emb.ptr   )self r   \/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/depparse/test_parser.pywordvec_pretrain_fileL   s   
z TestParser.wordvec_pretrain_fileFNc                 C   s6  t |d }t |d }t |d }	d}
t ||
 }t|ddd}|| W d   n1 s1w   Y  t|ddd}|| W d   n1 sMw   Y  d	|d
|d|d|	dddddddddt |d|
ddddddg}|sy|ddg |dur|| }t|}tj|sJ t	
|}t||d}|S )zP
        Run the training for a few iterations, load & return the model
        ztrain.conlluz
dev.conlluzpred.conlluztest_parser.ptwzutf-8)encodingNz--wordvec_pretrain_filez--train_filez--eval_filez--output_filez
--log_step10z--eval_interval20z--max_steps100z--shorthanden_testz
--save_dirz--save_namez--bert_start_finetuningz--bert_warmup_stepsz--langenz--augment_nopunctz0.0)r   
model_file)stropenwriteextendr   mainospathexistsr   Pretrainr   )r
   tmp_pathr   
train_textdev_textaugment_nopunct
extra_args
train_filedev_file	pred_file	save_name	save_filefoutargstrainerptsaved_modelr   r   r   run_trainingP   sD   

zTestParser.run_trainingc                 C   s   |  ||tt dS )zB
        Simple test of a few 'epochs' of tagger training
        Nr.   
TRAIN_DATADEV_DATAr
   r   r   r   r   r   
test_train{   s   zTestParser.test_trainc                 C   s   | j ||ttg dd d S )N)--bert_modelhf-internal-testing/tiny-bert--bert_hidden_layers2r#   r/   r2   r   r   r   test_with_bert_nlayers   s   z!TestParser.test_with_bert_nlayersc                 C   @   | j ||ttg dd}d|j v sJ d|j v sJ d S )Nr4   r5   --bert_finetuner6   r7   r8   bert_optimizerbert_schedulerr.   r0   r1   	optimizerkeys	schedulerr
   r   r   r+   r   r   r   test_with_bert_finetuning      z$TestParser.test_with_bert_finetuningc           
      C   s   | j ||ttg dd}d|j v sJ d|j v sJ |jd }|| }tj	|s/J t
j|dd dd	}td
d |d  D sHJ t|}ddi}t|||d}	|	| t
j|dd dd	}tdd |d  D svJ dS )zq
        Check that if we save, then load, then save a model with a finetuned bert, that bert isn't lost
        r;   r8   r=   r>   r'   c                 S      | S Nr   storagelocr   r   r   <lambda>       z>TestParser.test_with_bert_finetuning_resaved.<locals>.<lambda>T)weights_onlyc                 s       | ]}| d V  qdS 
bert_modelN
startswith.0xr   r   r   	<genexpr>       z?TestParser.test_with_bert_finetuning_resaved.<locals>.<genexpr>modelbert_finetuneF)r   r   r*   c                 S   rF   rG   r   rH   r   r   r   rK      rL   c                 s   rN   rO   rQ   rS   r   r   r   rV      rW   N)r.   r0   r1   r@   rA   rB   r*   r   r   r   torchloadanyr   r   r   save)
r
   r   r   r+   r'   filename
checkpointr,   r*   r-   r   r   r   !test_with_bert_finetuning_resaved   s   


"z,TestParser.test_with_bert_finetuning_resavedc                 C   r:   )N)r4   r5   r<   r6   r7   z
--use_peftr8   r=   r>   r?   rC   r   r   r   test_with_peft   rE   zTestParser.test_with_peftc           
      C   s   | j ||ttddgd}|jd }|jd }|jd }tjtj||s(J |d us.J tj|s6J t|j	dks?J |j	
 D ]}t|tjjsOJ qDt|}t|j||d}	|	j	d usdJ t|	j	dksmJ |	j	
 D ]}t|tjjs}J qrd S )	N--optimadamr8   save_dirr'   checkpoint_save_name   r*   r   r   )r.   r0   r1   r*   r   r   r   joinlenr@   values
isinstancerZ   optimAdamr   r   r   
r
   r   r   r+   rd   r'   checkpoint_nameoptr,   r_   r   r   r    test_single_optimizer_checkpoint   "   



z+TestParser.test_single_optimizer_checkpointc           
      C   s   | j ||ttg dd}|jd }|jd }|jd }tjtj||s(J |d us.J tj|s6J t|j	dks?J |j	
 D ]}t|tjjsOJ qDt|}t|j||d}	|	j	d usdJ t|	j	dksmJ |j	
 D ]}t|tjjs}J qrd S )N)rb   rc   z--second_optimsgdz--second_optim_start_step40r8   rd   r'   re   rf   rg   )r.   r0   r1   r*   r   r   r   rh   ri   r@   rj   rk   rZ   rl   SGDr   r   r   rn   r   r   r   test_two_optimizers_checkpoint   rr   z)TestParser.test_two_optimizers_checkpoint)FN)__name__
__module____qualname__pytestfixturer   r.   r3   r9   rD   r`   ra   rq   rv   r   r   r   r   r   K   s    


+r   )__doc__r   rz   rZ   stanza.modelsr   stanza.models.commonr   stanza.models.depparse.trainerr   stanza.testsr   markpipelinetravis
pytestmarklstripr0   r1   r   r   r   r   r   <module>   s     (+	