o
    h                     @   s   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
T ddlmZmZ ejjejjgZejddd	d
 Zdd Zdd Zd Zd ZG dd dZdS )z?
Test a couple basic functions - load & save an existing model
    N)
lemmatizer)trainer)*)choose_lemma_charlmbuild_charlm_argsmodulescopec                  C   s@   t jtddd} t| }t|dksJ |d }tj|dS )Nenlemmar      r   
model_file)ospathjoinTEST_MODELS_DIRgloblenr   Trainer)models_pathmodelsr    r   `/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/lemma/test_lemma_trainer.pyenglish_model   s
   
r   c                 C   s   dS )z5
    Does nothing, just tests that loading works
    Nr   )r   r   r   r   test_load_model       r   c                 C   sT   t  }tj|dd}| | tj|d}W d   dS 1 s#w   Y  dS )z$
    Load, save, and load again
    resavedzlemma.ptr   N)tempfileTemporaryDirectoryr   r   r   saver   r   )r   tempdir	save_filereloadedr   r   r   test_save_load_model"   s
   

"r$   a  
# sent_id = weblog-juancole.com_juancole_20051126063000_ENG_20051126_063000-0003
# text = DPA: Iraqi authorities announced that they had busted up 3 terrorist cells operating in Baghdad.
1	DPA	DPA	PROPN	NNP	Number=Sing	0	root	0:root	SpaceAfter=No
2	:	:	PUNCT	:	_	1	punct	1:punct	_
3	Iraqi	Iraqi	ADJ	JJ	Degree=Pos	4	amod	4:amod	_
4	authorities	authority	NOUN	NNS	Number=Plur	5	nsubj	5:nsubj	_
5	announced	announce	VERB	VBD	Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin	1	parataxis	1:parataxis	_
6	that	that	SCONJ	IN	_	9	mark	9:mark	_
7	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	9	nsubj	9:nsubj	_
8	had	have	AUX	VBD	Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin	9	aux	9:aux	_
9	busted	bust	VERB	VBN	Tense=Past|VerbForm=Part	5	ccomp	5:ccomp	_
10	up	up	ADP	RP	_	9	compound:prt	9:compound:prt	_
11	3	3	NUM	CD	NumForm=Digit|NumType=Card	13	nummod	13:nummod	_
12	terrorist	terrorist	ADJ	JJ	Degree=Pos	13	amod	13:amod	_
13	cells	cell	NOUN	NNS	Number=Plur	9	obj	9:obj	_
14	operating	operate	VERB	VBG	VerbForm=Ger	13	acl	13:acl	_
15	in	in	ADP	IN	_	16	case	16:case	_
16	Baghdad	Baghdad	PROPN	NNP	Number=Sing	14	obl	14:obl:in	SpaceAfter=No
17	.	.	PUNCT	.	_	1	punct	1:punct	_

# sent_id = weblog-juancole.com_juancole_20051126063000_ENG_20051126_063000-0004
# text = Two of them were being run by 2 officials of the Ministry of the Interior!
1	Two	two	NUM	CD	NumForm=Word|NumType=Card	6	nsubj:pass	6:nsubj:pass	_
2	of	of	ADP	IN	_	3	case	3:case	_
3	them	they	PRON	PRP	Case=Acc|Number=Plur|Person=3|PronType=Prs	1	nmod	1:nmod:of	_
4	were	be	AUX	VBD	Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin	6	aux	6:aux	_
5	being	be	AUX	VBG	VerbForm=Ger	6	aux:pass	6:aux:pass	_
6	run	run	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	0	root	0:root	_
7	by	by	ADP	IN	_	9	case	9:case	_
8	2	2	NUM	CD	NumForm=Digit|NumType=Card	9	nummod	9:nummod	_
9	officials	official	NOUN	NNS	Number=Plur	6	obl	6:obl:by	_
10	of	of	ADP	IN	_	12	case	12:case	_
11	the	the	DET	DT	Definite=Def|PronType=Art	12	det	12:det	_
12	Ministry	Ministry	PROPN	NNP	Number=Sing	9	nmod	9:nmod:of	_
13	of	of	ADP	IN	_	15	case	15:case	_
14	the	the	DET	DT	Definite=Def|PronType=Art	15	det	15:det	_
15	Interior	Interior	PROPN	NNP	Number=Sing	12	nmod	12:nmod:of	SpaceAfter=No
16	!	!	PUNCT	.	_	6	punct	6:punct	_

a}  
1	From	from	ADP	IN	_	3	case	3:case	_
2	the	the	DET	DT	Definite=Def|PronType=Art	3	det	3:det	_
3	AP	AP	PROPN	NNP	Number=Sing	4	obl	4:obl:from	_
4	comes	come	VERB	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	0	root	0:root	_
5	this	this	DET	DT	Number=Sing|PronType=Dem	6	det	6:det	_
6	story	story	NOUN	NN	Number=Sing	4	nsubj	4:nsubj	_
7	:	:	PUNCT	:	_	4	punct	4:punct	_

c                   @   s:   e Zd Zejdddd ZdddZdd	 Zd
d ZdS )TestLemmatizerclassr   c                 C   s   t ddd}td|td}|S )Nr
   testdefault)	model_dir)r   r   r   )selfcharlmcharlm_argsr   r   r   r,   a   s   zTestLemmatizer.charlm_argsNc                 C   s
  t |d }d}t || }t |d }t|ddd}	|	| W d   n1 s+w   Y  t |d }
t|
ddd}	|	| W d   n1 sMw   Y  d	|d
|
d|
d|dddddt |d|ddg}|durp|| }t| tj|s}J tj	|d}|S )zP
        Run the training for a few iterations, load & return the model
        zpred.conlluztest_tagger.ptztrain.conlluwzutf-8)encodingNz
dev.conlluz--train_filez--eval_filez--gold_filez--output_filez--num_epoch2z
--log_step10z
--save_dirz--save_namez--shorthanden_testr   )
stropenwriter   mainr   r   existsr   r   )r*   tmp_path
train_textdev_text
extra_args	pred_file	save_namer"   
train_filefoutdev_fileargssaved_modelr   r   r   run_trainingh   s6   	
zTestLemmatizer.run_trainingc                 C   s   |  |tt dS )F
        Simple test of a few 'epochs' of lemmatizer training
        N)rB   
TRAIN_DATADEV_DATA)r*   r7   r   r   r   test_basic_train   s   zTestLemmatizer.test_basic_trainc                 C   sd   | j |tt|d}|j}tj|d |d }tj|dd dd}t	dd	 |d
 
 D r0J dS )rC   )r:   save_dirr<   c                 S   s   | S Nr   )storagelocr   r   r   <lambda>   r   z2TestLemmatizer.test_charlm_train.<locals>.<lambda>T)weights_onlyc                 s   s    | ]}| d V  qdS )contextual_embeddingN)
startswith).0xr   r   r   	<genexpr>   s    z3TestLemmatizer.test_charlm_train.<locals>.<genexpr>modelN)rB   rD   rE   r@   r   r   r   torchloadanykeys)r*   r7   r,   rA   r@   r<   
checkpointr   r   r   test_charlm_train   s
   "z TestLemmatizer.test_charlm_trainrH   )	__name__
__module____qualname__pytestfixturer,   rB   rF   rX   r   r   r   r   r%   `   s    


"r%   )__doc__r\   r   r   r   rS   stanza.modelsr   stanza.models.lemmar   stanza.testsstanza.utils.training.commonr   r   markpipelinetravis
pytestmarkr]   r   r   r$   lstriprD   rE   r%   r   r   r   r   <module>   s,    

	(*	