o
    h+                     @   s   d Z ddlZddlZddlT ddlmZ ddlmZmZ ddl	m
Z
 ddlmZmZmZmZ dd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zd Zd Zdd Zd Zdd ZdS )z5
A few tests of specific operations from the Dataset
    N)*)tagger)DatasetShuffledDataset)CoNLL)
TRAIN_DATATRAIN_DATA_NO_XPOSTRAIN_DATA_NO_UPOSTRAIN_DATA_NO_FEATSc                  C   sF   t jg d} tjtd}t|| d}|jsJ |jsJ |js!J dS zE
    Test that a dataset with no xpos is detected by the Dataset
    args	input_strN)	r   
parse_argsr   	conll2docr   r   has_uposhas_xpos	has_featsr   	train_docdata r   U/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/pos/test_data.pytest_basic_reading      

r   c                  C   sF   t jg d} tjtd}t|| d}|jsJ |jrJ |js!J dS r   )	r   r   r   r   r   r   r   r   r   r   r   r   r   test_no_xpos   r   r   c                  C   sF   t jg d} tjtd}t|| d}|jrJ |jsJ |js!J dS )zE
    Test that a dataset with no upos is detected by the Dataset
    r   r   N)	r   r   r   r   r	   r   r   r   r   r   r   r   r   test_no_upos+   r   r   c                  C   sF   t jg d} tjtd}t|| d}|jsJ |jsJ |jr!J dS )zF
    Test that a dataset with no feats is detected by the Dataset
    r   r   N)	r   r   r   r   r
   r   r   r   r   r   r   r   r   test_no_feats9   r   r   c                  C   sl   t jg dd} tjtd}t|| d}|jdd}tdD ]}|D ]}|jD ]
}|d d	v s1J q'q"qdS )
z\
    Test that with no punct removing augmentation, the doc always has punct at the end
    )--shorthanden_test--augment_nopunct0.0r   r   N   
batch_size2   .!	r   r   r   r   r   r   	to_loaderrangetextr   r   r   ibatchr.   r   r   r   test_no_augmentG      
r2   c                  C   sl   t jg dd} tjtd}t|| d}|jdd}tdD ]}|D ]}|jD ]
}|d d	vs1J q'q"qdS )
z]
    Test that with 100% punct removing augmentation, the doc never has punct at the end
    )r   r    r!   z1.0r   r   Nr#   r$   r&   r'   r(   r+   r/   r   r   r   test_augmentV   r3   r4   c                  C   s   t jg dd} tjtd}t|| d}|jdd}d}d}tdD ]}|D ]}|jD ]}|d	 d
v r8|d7 }q+|d7 }q+q&q"|dksEJ |dksKJ dS )z
    Test 50% punct removing augmentation

    With this frequency, we should get a reasonable number of docs
    with a punct at the end and a reasonable without.
    )r   r    r!   z0.5r   r   Nr#   r$   r   r&   r'   r(         r+   )r   r   r   
count_withcount_withoutr0   r1   r.   r   r   r   test_sometimes_augmente   s    


	r9   z
# text = Noxpos {indexp}
# sent_id = {index}
1	Noxpos	noxpos	NOUN	_	Number=Sing	0	root	_	start_char=0|end_char=8|ner=O
2	{indexp}	{indexp}	NUM	_	NumForm=Digit|NumType=Card	1	dep	_	start_char=9|end_char=10|ner=S-CARDINAL
z
# text = Yesxpos {indexp}
# sent_id = {index}
1	Yesxpos	yesxpos	NOUN	NN	Number=Sing	0	root	_	start_char=0|end_char=8|ner=O
2	{indexp}	{indexp}	NUM	CD	NumForm=Digit|NumType=Card	1	dep	_	start_char=9|end_char=10|ner=S-CARDINAL
c                 C   s  t jg dd}dd tdD }tjd|d}t||d }dd tdD }tjd|d}t||d }t||gd	}td
d |D dksLJ d}	d}
|D ]"}|j	d ur^|	d7 }	n|
d7 }
|	|
 dkrt|	dksnJ |
dkstJ qR|	dks{J |
dksJ d S )Nz--batch_size10r   r    r!   r"   r   c                 S      g | ]}t j||d  dqS )r5   indexindexp)NO_XPOS_TEMPLATEformat.0idxr   r   r   
<listcomp>       z test_shuffle.<locals>.<listcomp>  z

r   c                 S   r<   )e   r=   )YES_XPOS_TEMPLATErA   rB   r   r   r   rE      rF   
   c                 s   s    | ]}d V  qdS )r5   Nr   )rC   _r   r   r   	<genexpr>   s    ztest_shuffle.<locals>.<genexpr>   r   r5   d   )
r   r   r-   r   r   joinr   r   sumxpos)tmp_pathr   no_xposno_docno_datayes_xposyes_docyes_datashufflednum_withnum_withoutr1   r   r   r   test_shuffle   s*   

r\   u  
# sent_id = weblog-blogspot.com_alaindewitt_20040929103700_ENG_20040929_103700-0048
# text = Bush asked for permission to go to Alabama to work on a Senate campaign.
1	Bush	Bush	PROPN	NNP	Number=Sing	2	nsubj	2:nsubj	_
2	asked	ask	VERB	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	0:root	_
3	for	for	ADP	IN	_	4	case	4:case	_
4	permission	permission	NOUN	NN	Number=Sing	2	obl	2:obl:for	_
5	to	to	PART	TO	_	6	mark	6:mark	_
6	go	go	VERB	VB	VerbForm=Inf	4	acl	4:acl:to	_
7	to	to	ADP	IN	_	8	case	8:case	_
8	Alabama	Alabama	PROPN	NNP	Number=Sing	6	obl	6:obl:to	_
9	to	to	PART	TO	_	10	mark	10:mark	_
10	work	work	VERB	VB	VerbForm=Inf	6	advcl	6:advcl:to	_
11	on	on	ADP	IN	_	14	case	14:case	_
12	a	a	DET	DT	Definite=Ind|PronType=Art	14	det	14:det	_
13	Senate	Senate	PROPN	NNP	Number=Sing	14	compound	14:compound	_
14	campaign	campaign	NOUN	NN	Number=Sing	10	obl	10:obl:on	SpaceAfter=No
15	.	.	PUNCT	.	_	2	punct	2:punct	_

# sent_id = weblog-blogspot.com_alaindewitt_20040929103700_ENG_20040929_103700-0049
# text = His superior officers said OK.
1	His	his	PRON	PRP$	Case=Gen|Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs	3	nmod:poss	3:nmod:poss	_
2	superior	superior	ADJ	JJ	Degree=Pos	3	amod	3:amod	_
3	officers	officer	NOUN	NNS	Number=Plur	4	nsubj	4:nsubj	_
4	said	say	VERB	VBD	Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin	0	root	0:root	_
5	OK	ok	INTJ	UH	_	4	obj	4:obj	SpaceAfter=No
6	.	.	PUNCT	.	_	4	punct	4:punct	_

# sent_id = weblog-blogspot.com_alaindewitt_20040929103700_ENG_20040929_103700-0053
# text = In ’72 or ’73, if you were a pilot, active or Guard, and you had an obligation and wanted to get out, no problem.
1	In	in	ADP	IN	_	2	case	2:case	_
2	’72	'72	NUM	CD	NumForm=Digit|NumType=Card	10	obl	10:obl:in	_
3	or	or	CCONJ	CC	_	4	cc	4:cc	_
4	’73	'73	NUM	CD	NumForm=Digit|NumType=Card	2	conj	2:conj:or|10:obl:in	SpaceAfter=No
5	,	,	PUNCT	,	_	2	punct	2:punct	_
6	if	if	SCONJ	IN	_	10	mark	10:mark	_
7	you	you	PRON	PRP	Case=Nom|Person=2|PronType=Prs	10	nsubj	10:nsubj	_
8	were	be	AUX	VBD	Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin	10	cop	10:cop	_
9	a	a	DET	DT	Definite=Ind|PronType=Art	10	det	10:det	_
10	pilot	pilot	NOUN	NN	Number=Sing	28	advcl	28:advcl:if	SpaceAfter=No
11	,	,	PUNCT	,	_	12	punct	12:punct	_
12	active	active	ADJ	JJ	Degree=Pos	10	amod	10:amod	_
13	or	or	CCONJ	CC	_	14	cc	14:cc	_
14	Guard	Guard	PROPN	NNP	Number=Sing	12	conj	10:amod|12:conj:or	SpaceAfter=No
15	,	,	PUNCT	,	_	18	punct	18:punct	_
16	and	and	CCONJ	CC	_	18	cc	18:cc	_
17	you	you	PRON	PRP	Case=Nom|Person=2|PronType=Prs	18	nsubj	18:nsubj|22:nsubj|24:nsubj:xsubj	_
18	had	have	VERB	VBD	Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin	10	conj	10:conj:and|28:advcl:if	_
19	an	a	DET	DT	Definite=Ind|PronType=Art	20	det	20:det	_
20	obligation	obligation	NOUN	NN	Number=Sing	18	obj	18:obj	_
21	and	and	CCONJ	CC	_	22	cc	22:cc	_
22	wanted	want	VERB	VBD	Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin	18	conj	18:conj:and	_
23	to	to	PART	TO	_	24	mark	24:mark	_
24	get	get	VERB	VB	VerbForm=Inf	22	xcomp	22:xcomp	_
25	out	out	ADV	RB	_	24	advmod	24:advmod	SpaceAfter=No
26	,	,	PUNCT	,	_	10	punct	10:punct	_
27	no	no	DET	DT	PronType=Neg	28	det	28:det	_
28	problem	problem	NOUN	NN	Number=Sing	0	root	0:root	SpaceAfter=No
29	.	.	PUNCT	.	_	28	punct	28:punct	_

# sent_id = weblog-blogspot.com_alaindewitt_20040929103700_ENG_20040929_103700-0054
# text = In fact, you were helping them solve their problem.”
1	In	in	ADP	IN	_	2	case	2:case	_
2	fact	fact	NOUN	NN	Number=Sing	6	obl	6:obl:in	SpaceAfter=No
3	,	,	PUNCT	,	_	2	punct	2:punct	_
4	you	you	PRON	PRP	Case=Nom|Person=2|PronType=Prs	6	nsubj	6:nsubj	_
5	were	be	AUX	VBD	Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin	6	aux	6:aux	_
6	helping	help	VERB	VBG	Tense=Pres|VerbForm=Part	0	root	0:root	_
7	them	they	PRON	PRP	Case=Acc|Number=Plur|Person=3|PronType=Prs	6	obj	6:obj|8:nsubj:xsubj	_
8	solve	solve	VERB	VB	VerbForm=Inf	6	xcomp	6:xcomp	_
9	their	their	PRON	PRP$	Case=Gen|Number=Plur|Person=3|Poss=Yes|PronType=Prs	10	nmod:poss	10:nmod:poss	_
10	problem	problem	NOUN	NN	Number=Sing	8	obj	8:obj	SpaceAfter=No
11	.	.	PUNCT	.	_	6	punct	6:punct	SpaceAfter=No
12	”	"	PUNCT	''	_	6	punct	6:punct	_

# sent_id = weblog-blogspot.com_alaindewitt_20040929103700_ENG_20040929_103700-0055
# text = So Bush stopped flying.
1	So	so	ADV	RB	_	3	advmod	3:advmod	_
2	Bush	Bush	PROPN	NNP	Number=Sing	3	nsubj	3:nsubj|4:nsubj:xsubj	_
3	stopped	stop	VERB	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	0:root	_
4	flying	fly	VERB	VBG	VerbForm=Ger	3	xcomp	3:xcomp	SpaceAfter=No
5	.	.	PUNCT	.	_	3	punct	3:punct	_
c                  C   s   t jtd} tjg dd}t| |d }|dd}dd |D }|dgks(J |d	d}d
d |D }|ddgks=J |dd}dd |D }|g dksRJ |dd}dd |D }|ddgksgJ |dd}dd |D }|g dks|J d S )Nr   r:   r   r6   rG   c                 S      g | ]}|j qS r   rD   rC   r1   r   r   r   rE         z2test_length_limited_dataloader.<locals>.<listcomp>)r   r5   r#         rb   c                 S   r]   r   r^   r_   r   r   r   rE     r`   )r   r5   r#   ra   rb   r#   c                 S   r]   r   r^   r_   r   r   r   rE     r`   )r   r5   )r#   ra   rc   7   c                 S   r]   r   r^   r_   r   r   r   rE     r`   )r   r5   r#   ra   rb      c                 S   r]   r   r^   r_   r   r   r   rE   !  r`   )rd   )r#   rf   )r   r   
EWT_SAMPLEr   r   r   to_length_limited_loader)sampler   r   dlbatchesr   r   r   test_length_limited_dataloader  s$   rm   )__doc__ospyteststanza.models.common.docstanza.modelsr   stanza.models.pos.datar   r   stanza.utils.conllr   stanza.tests.pos.test_taggerr   r   r	   r
   r   r   r   r   r2   r4   r9   stripr@   rI   r\   lstriprh   rm   r   r   r   r   <module>   s4    #RT