o
    h8Z                     @   sj  d Z ddlZddlZddlZddlmZ ddlZddlmZ ddl	m
Z
 ddlT ejjZg dg dg d	g d
g dg dg dg dg dg dg
gZddddddddddddddddd dd!d"d#d$d%dd&d'dd(d)d*d+d,d-d.d/dd0d1d1d2d3dd4d5dd6d7d8d9d:d;d;d<d=d>d?d@d*d*d+dAd=d.dBdCdDdDd2dEd-dFdGddHdIdIdJddKdLdMg
gZdNdO ZdPdQ ZdRdS ZdT ZdUdVgZdWdXgZdYdZ Zd[d\ Zd] Zd^d_ Zd`da Zdbdc Zddde Zdfdg Zdhdi Z dj Z!dkdl Z"dm Z#dndo Z$dp Z%dqdr Z&ds Z'dt Z(dudv Z)dwdx Z*dydz Z+d{ Z,d|d} Z-d~ Z.dd Z/d0 Z1dd Z2d0 Z3dd Z4dS )z$
Basic tests of the data conversion
    N)ZipFile)CoNLL)Document)*)
1NousilPRON_!Number=Plur|Person=1|PronType=Prs3nsubjr
   start_char=0|end_char=4)
2avonsavoirAUXr
   5Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Finr   	aux:tenser
   start_char=5|end_char=10)
r   atteint	atteindreVERBr
   0Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part0rootr
   start_char=11|end_char=18)
4laleDETr
   0Definite=Def|Gender=Fem|Number=Sing|PronType=Art5detr
   start_char=19|end_char=21)
r"   finr%   NOUNr
   Gender=Fem|Number=Singr   objr
   start_char=22|end_char=25)
z6-7dur
   r
   r
   r
   r
   r
   r
   start_char=26|end_char=28)
6der-   ADPr
   r
   8caser
   r
   )
7r   r   r    r
   1Definite=Def|Gender=Masc|Number=Sing|PronType=Artr/   r#   r
   r
   )
r/   sentierr3   r&   r
   Gender=Masc|Number=Singr"   nmodr
   start_char=29|end_char=36)
9.r8   PUNCTr
   r
   r   punctr
   start_char=36|end_char=37)   r   r   r	   r      r   r   )idtextlemmauposfeatsheaddeprelmisc)   r   r   r   r   r   r   )r=   r   r   r   r   r   r   )   r   r   r    r!      r#   r$   )rH   r%   r&   r'   r(   r)   )      r*   r+   )r>   r?   rE   )rI   r-   r.      r0   )r>   r?   r@   rA   rC   rD   )rJ   r2   )r>   r?   r@   rA   rB   rC   rD   )rK   r3   r4   r5   r6   )	   r8   r9   r:   r;   )r>   r?   r@   rA   rC   rD   rE   c                  C   sH   t t\} }| tksJ t| t|ksJ tdd |D s"J d S )Nc                 s   s    | ]	}t |d kV  qdS )r   N)len).0x rP   c/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/common/test_data_conversion.py	<genexpr>-   s    z%test_conll_to_dict.<locals>.<genexpr>)r   convert_conllCONLLDICTrM   all)dictsemptyrP   rP   rQ   test_conll_to_dict)   s   rY   c                  C   s2   t t} dd d| dD }|tksJ d S )Nc                 S       g | ]}d d | dD qS )c                 S      g | ]}| d qS 	splitrN   sentencerP   rP   rQ   
<listcomp>2       z1test_dict_to_conll.<locals>.<listcomp>.<listcomp>
r^   rN   docrP   rP   rQ   rb   2        z&test_dict_to_conll.<locals>.<listcomp>{:c}

)r   rU   formatr_   rT   )documentconllrP   rP   rQ   test_dict_to_conll/   s   rm   c                  C   sB   t t} |  }t |} dd d| dD }|tksJ dS )aX  
    Test the conversion from raw dict to Document and back

    This code path will first turn start_char|end_char into start_char & end_char fields in the Document
    That version to a dict will have separate fields for each of those
    Finally, the conversion from that dict to a list of conll entries should convert that back to misc
    c                 S   rZ   )c                 S   r[   r\   r^   r`   rP   rP   rQ   rb   @   rc   z?test_dict_to_doc_and_doc_to_dict.<locals>.<listcomp>.<listcomp>rd   r^   re   rP   rP   rQ   rb   @   rg   z4test_dict_to_doc_and_doc_to_dict.<locals>.<listcomp>rh   ri   N)r   rU   to_dictrj   r_   rT   )rk   rW   rl   rP   rP   rQ    test_dict_to_doc_and_doc_to_dict5   s
   ro   uj  
# sent_id = yandex.reviews-f-8xh5zqnmwak3t6p68y4rhwd4e0-1969-9253
# genre = review
# text = Как- то слишком мало цветов получают актёры после спектакля.
1	Как	как-то	ADV	_	Degree=Pos|PronType=Ind	7	advmod	_	SpaceAfter=No
2	-	-	PUNCT	_	_	3	punct	_	_
3	то	то	PART	_	_	1	list	_	deprel=list:goeswith
4	слишком	слишком	ADV	_	Degree=Pos	5	advmod	_	_
5	мало	мало	ADV	_	Degree=Pos	6	advmod	_	_
6	цветов	цветок	NOUN	_	Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur	7	obj	_	_
7	получают	получать	VERB	_	Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act	0	root	_	_
8	актёры	актер	NOUN	_	Animacy=Anim|Case=Nom|Gender=Masc|Number=Plur	7	nsubj	_	_
9	после	после	ADP	_	_	10	case	_	_
10	спектакля	спектакль	NOUN	_	Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing	7	obl	_	SpaceAfter=No
11	.	.	PUNCT	_	_	7	punct	_	_

# sent_id = 4
# genre = social
# text = В женщине важна верность, а не красота.
1	В	в	ADP	_	_	2	case	_	_
2	женщине	женщина	NOUN	_	Animacy=Anim|Case=Loc|Gender=Fem|Number=Sing	3	obl	_	_
3	важна	важный	ADJ	_	Degree=Pos|Gender=Fem|Number=Sing|Variant=Short	0	root	_	_
4	верность	верность	NOUN	_	Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing	3	nsubj	_	SpaceAfter=No
5	,	,	PUNCT	_	_	8	punct	_	_
6	а	а	CCONJ	_	_	8	cc	_	_
7	не	не	PART	_	Polarity=Neg	8	advmod	_	_
8	красота	красота	NOUN	_	Animacy=Inan|Case=Nom|Gender=Fem|Number=Sing	4	conj	_	SpaceAfter=No
9	.	.	PUNCT	_	_	3	punct	_	_
un   Как- то слишком мало цветов получают актёры после спектакля.uF   В женщине важна верность, а не красота.z5yandex.reviews-f-8xh5zqnmwak3t6p68y4rhwd4e0-1969-9253r   c                 C   s  t d}t| jdksJ |d | jd jd ksJ |d | jd jd ks*J |d | jd jd ks8J tttt| jD ]+\}\}}}||j	ksOJ ||j
ksVJ ||jks]J t|jdksfJ | rlJ qAd| }|d}t|dksJ |d d}t|dksJ |d |d ksJ |d |d ksJ |d |d ksJ | jd jd jdksJ | jd jd jd	ksJ d
S )z_
    Refactored the test for the Russian doc so we can use it to test various file methods
    rd   rF   r   r<   r=   {:C}ri      zlist:goeswithN)RUSSIAN_SAMPLEr_   rM   	sentencescomments	enumeratezipRUSSIAN_TEXTRUSSIAN_IDSr?   sent_idindexhas_enhanced_dependenciesrj   wordsrC   rD   )rf   linessent_idxexpected_textexpected_idra   rs   rP   rP   rQ   check_russian_doce   s*   
 

r   c                 C   s   | d }t jtd}t| t || t|dd}| }W d   n1 s)w   Y  |ds5J | }||	dd }tt	dd }||ksQJ t |}t| dS )z6
    Specifically test the write_doc2conll method
    russian.conll	input_strutf-8encodingNri   z# sent_id = 4)
r   	conll2docrr   r   write_doc2conllopenreadendswithstripfind)tmp_pathfilenamerf   r%   r?   sampledoc2rP   rP   rQ   test_write_russian_doc   s   

r   a{  
# newdoc
# sent_id = 1
# text = It is hers.
# previous = Which person owns this?
# comment = copular subject
1	It	it	PRON	PRP	Number=Sing|Person=3|PronType=Prs	3	nsubj	_	_
2	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	3	cop	_	_
3	hers	hers	PRON	PRP	Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs	0	root	_	SpaceAfter=No
4	.	.	PUNCT	.	_	3	punct	_	_
c                  C   sJ   t jtd} t }t | | | }|dsJ | tks#J d S )Nr   ri   )	r   r   ENGLISH_SAMPLEioStringIOr   getvaluer   r   )rf   outputoutput_valuerP   rP   rQ   test_write_to_io   s   r   c                 C   s   t jtd}| d }t || t j||dd t|}| }W d    n1 s+w   Y  td t d }||ks>J d S )Nr   zenglish.conlla)moderi   )r   r   r   r   r   r   )r   rf   r   r%   r?   expectedrP   rP   rQ   test_write_doc2conll_append   s   

r   c                  C   s   t jtd} t|  dS )zI
    Test that a doc with comments gets converted back with comments
    r   N)r   r   rr   r   rf   rP   rP   rQ   test_doc_with_comments   s   r   c                  C   s   t jtd} d| d}t|dksJ |d d}t|dks%J |D ]&}|d}t|d	ks<t|d
ks<J t|d
krMtdd |D sMJ q'dS )z
    The above RUSSIAN_SAMPLE resulted in a blank misc field in one particular implementation of the conll code
    (the below test would fail)
    r   rp   ri   rF   r   rd   rq   r]   r<   
   c                 s   s    | ]}|V  qd S )NrP   )rN   piecerP   rP   rQ   rR      s    z$test_unusual_misc.<locals>.<genexpr>N)r   r   rr   rj   r_   rM   rV   )rf   rs   ra   wordpiecesrP   rP   rQ   test_unusual_misc   s   
r   c               	   C   s   t  6} tj| d}t|ddd}|t W d   n1 s#w   Y  tj	|d}t
| W d   dS 1 s=w   Y  dS )z(
    Test loading a doc from a file
    r   wr   r   N)
input_file)tempfileTemporaryDirectoryospathjoinr   writerr   r   r   r   )tempdirr   foutrf   rP   rP   rQ   	test_file   s   

"r   c               
   C   s   t  O} tj| d}d}t|d%}||d}|t	  W d   n1 s,w   Y  W d   n1 s;w   Y  t
j||d}t| W d   dS 1 sVw   Y  dS )z,
    Test loading a doc from a zip file
    zrussian.zipr   r   N)r   zip_file)r   r   r   r   r   r   r   r   rr   encoder   r   r   )r   r   r   zoutr   rf   rP   rP   rQ   test_zip_file   s   

"r   a|  
# text = Teferi's best friend is Karn
# sent_id = 0
1	Teferi	_	_	_	_	0	_	_	start_char=0|end_char=6|ner=S-PERSON
2	's	_	_	_	_	1	_	_	start_char=6|end_char=8|ner=O
3	best	_	_	_	_	2	_	_	start_char=9|end_char=13|ner=O
4	friend	_	_	_	_	3	_	_	start_char=14|end_char=20|ner=O
5	is	_	_	_	_	4	_	_	start_char=21|end_char=23|ner=O
6	Karn	_	_	_	_	5	_	_	start_char=24|end_char=28|ner=S-PERSON
c                  C   s   t jtd} t| jdksJ | jd }t|jdksJ g d}t|j|D ]!\}}|j|ks2J |jr7J t|j	dks@J |j	d jrHJ q'd
| }|tksTJ dS )z=
    Test that tokens get properly created with NER tags
    r   r<   r   rI   )S-PERSONOr   r   r   r   rp   N)r   r   
SIMPLE_NERrM   rs   tokensrv   nerrE   r|   rj   )rf   ra   EXPECTED_NERtokenr   rl   rP   rP   rQ   test_simple_ner_conversion   s   


r   a|  
# text = This makes John's headache worse
# sent_id = 0
1	This	_	_	_	_	0	_	_	start_char=0|end_char=4|ner=O
2	makes	_	_	_	_	1	_	_	start_char=5|end_char=10|ner=O
3-4	John's	_	_	_	_	_	_	_	start_char=11|end_char=17|ner=S-PERSON
3	John	_	_	_	_	2	_	_	_
4	's	_	_	_	_	3	_	_	_
5	headache	_	_	_	_	4	_	_	start_char=18|end_char=26|ner=O
6	worse	_	_	_	_	5	_	_	start_char=27|end_char=32|ner=O
c                  C   s   t jtd} t| jdksJ | jd }t|jdksJ | r#J g d}g d}t|j||D ]"\}}}|j|ks>J |j	rCJ t|j
|ksLJ |j
d j	rTJ q2d| }|tks`J dS )	z
    Test that tokens including MWT get properly created with NER tags

    Note that this kind of thing happens with the EWT tokenizer for English, for example
    r   r<   r   rH   )r   r   r   r   r   )r<   r<   rF   r<   r<   rp   N)r   r   MWT_NERrM   rs   r   r{   rv   r   rE   r|   rj   )rf   ra   r   EXPECTED_WORDSr   r   expected_wordsrl   rP   rP   rQ   test_mwt_ner_conversion  s   


r   u  
# newpar
# sent_id = aia_foorum_37
# text = Sestpeale ei mõistagi neid, kes koduaias sortidega tegelevad.
1	Sestpeale	sest_peale	ADV	D	_	3	advmod	3:advmod	_
2	ei	ei	AUX	V	Polarity=Neg	3	aux	3:aux	_
3	mõistagi	mõistma	VERB	V	Connegative=Yes|Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act	0	root	0:root	_
4	neid	tema	PRON	P	Case=Par|Number=Plur|Person=3|PronType=Prs	3	obj	3:obj|9:nsubj	SpaceAfter=No
5	,	,	PUNCT	Z	_	9	punct	9:punct	_
6	kes	kes	PRON	P	Case=Nom|Number=Plur|PronType=Int,Rel	9	nsubj	4:ref	_
7	koduaias	kodu_aed	NOUN	S	Case=Ine|Number=Sing	9	obl	9:obl	_
8	sortidega	sort	NOUN	S	Case=Com|Number=Plur	9	obl	9:obl	_
9	tegelevad	tegelema	VERB	V	Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act	4	acl:relcl	4:acl	SpaceAfter=No
10	.	.	PUNCT	Z	_	3	punct	3:punct	_
c                  C   s~   t jtd} t| jdksJ | jd }t|jdksJ | s#J | jd jd }|jdks2J d	| }|tks=J d S )Nr   r<   r   r   r=   z3:obj|9:nsubjrp   )
r   r   ESTONIAN_DEPSrM   rs   r   r{   r|   depsrj   )rf   ra   r   rl   rP   rP   rQ   test_deps_conversionG  s   

r   uG  
# sent_id = ewtb2_000035_15
# text = Ja paari aasta pärast rôômalt maasikatele ...
1	Ja	ja	CCONJ	J	_	3	cc	5.1:cc	_
2	paari	paar	NUM	N	Case=Gen|Number=Sing|NumForm=Word|NumType=Card	3	nummod	3:nummod	_
3	aasta	aasta	NOUN	S	Case=Gen|Number=Sing	0	root	5.1:obl	_
4	pärast	pärast	ADP	K	AdpType=Post	3	case	3:case	_
5	rôômalt	rõõmsalt	ADV	D	Typo=Yes	3	advmod	5.1:advmod	Orphan=Yes|CorrectForm=rõõmsalt
5.1	panna	panema	VERB	V	VerbForm=Inf	_	_	0:root	Empty=5.1
6	maasikatele	maasikas	NOUN	S	Case=All|Number=Plur	3	obl	5.1:obl	Orphan=Yes
7	...	...	PUNCT	Z	_	3	punct	5.1:punct	_
u  
# sent_id = ewtb2_000035_15
# text = Ja paari aasta pärast rôômalt maasikatele ...
1	Ja	ja	CCONJ	J	_	3	cc	5.1:cc	_
2	paari	paar	NUM	N	Case=Gen|Number=Sing|NumForm=Word|NumType=Card	3	nummod	3:nummod	_
3	aasta	aasta	NOUN	S	Case=Gen|Number=Sing	0	root	5.1:obl	_
4	pärast	pärast	ADP	K	AdpType=Post	3	case	3:case	_
5	rôômalt	rõõmsalt	ADV	D	Typo=Yes	3	advmod	5.1:advmod	Orphan=Yes|CorrectForm=rõõmsalt
5.1	panna	panema	VERB	V	VerbForm=Inf	_	_	0:root	Empty=5.1
c                   C      t td dS )zS
    Check that we can read and then output a sentence with empty dependencies
    rJ   N)check_empty_deps_conversionESTONIAN_EMPTY_DEPSrP   rP   rP   rQ   test_empty_deps_conversionl     r   c                   C   r   )zS
    The empty deps conversion should also work if the empty dep is at the end
    rH   N)r   ESTONIAN_EMPTY_END_DEPSrP   rP   rP   rQ   !test_empty_deps_at_end_conversionr  r   r   c                 C   s  t j| dd}t|jdksJ t|jd j|ksJ t|jd j|ks(J t|jd jdks4J |jd }d|}|| ksDJ |jd  }t||d ksUJ |d d dks_J t|jdkshJ t|jd j|kstJ t|jd j|ksJ t|jd jdksJ d S )	NFr   ignore_gappingr<   r   rp   rH   r>   )rH   r<   )	r   r   rM   rs   r   r|   empty_wordsrj   rn   )r   r   rf   ra   rl   sentence_dictrP   rP   rQ   r   x  s   

r   u`  
# doc_id = this_is_a_doc
# sent_id = ewtb2_000035_15
# text = Ja paari aasta pärast rôômalt maasikatele ...
1	Ja	ja	CCONJ	J	_	3	cc	5.1:cc	_
2	paari	paar	NUM	N	Case=Gen|Number=Sing|NumForm=Word|NumType=Card	3	nummod	3:nummod	_
3	aasta	aasta	NOUN	S	Case=Gen|Number=Sing	0	root	5.1:obl	_
4	pärast	pärast	ADP	K	AdpType=Post	3	case	3:case	_
5	rôômalt	rõõmsalt	ADV	D	Typo=Yes	3	advmod	5.1:advmod	Orphan=Yes|CorrectForm=rõõmsalt
5.1	panna	panema	VERB	V	VerbForm=Inf	_	_	0:root	Empty=5.1
6	maasikatele	maasikas	NOUN	S	Case=All|Number=Plur	3	obl	5.1:obl	Orphan=Yes
7	...	...	PUNCT	Z	_	3	punct	5.1:punct	_
c                  C   s8   t jtdd} d| tksJ | jd jdksJ d S )NFr   rp   r   this_is_a_doc)r   r   ESTONIAN_DOC_IDrj   rs   doc_idr   rP   rP   rQ   test_read_doc_id  s   r   a  
# text = Teferi's best friend is Karn
# sent_id = 0
# notes = this sentence has a dependency index outside the sentence.  it should throw an IndexError
1	Teferi	_	_	_	_	0	root	_	start_char=0|end_char=6|ner=S-PERSON
2	's	_	_	_	_	1	dep	_	start_char=6|end_char=8|ner=O
3	best	_	_	_	_	2	dep	_	start_char=9|end_char=13|ner=O
4	friend	_	_	_	_	3	dep	_	start_char=14|end_char=20|ner=O
5	is	_	_	_	_	4	dep	_	start_char=21|end_char=23|ner=O
6	Karn	_	_	_	_	8	dep	_	start_char=24|end_char=28|ner=S-PERSON
c                  C   s<   t t tjtd} W d    d S 1 sw   Y  d S )Nr   )pytestraises
IndexErrorr   r   SIMPLE_DEPENDENCY_INDEX_ERRORr   rP   rP   rQ   test_read_dependency_errors  s   "r   au
  
# doc_id = doc_1
# sent_id = weblog-juancole.com_juancole_20051126063000_ENG_20051126_063000-0020
# text = His mother was also killed in the attack.
1	His	his	PRON	PRP$	Case=Gen|Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs	2	nmod:poss	2:nmod:poss	_
2	mother	mother	NOUN	NN	Number=Sing	5	nsubj:pass	5:nsubj:pass	_
3	was	be	AUX	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	5	aux:pass	5:aux:pass	_
4	also	also	ADV	RB	_	5	advmod	5:advmod	_
5	killed	kill	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	0	root	0:root	_
6	in	in	ADP	IN	_	8	case	8:case	_
7	the	the	DET	DT	Definite=Def|PronType=Art	8	det	8:det	_
8	attack	attack	NOUN	NN	Number=Sing	5	obl	5:obl:in	SpaceAfter=No
9	.	.	PUNCT	.	_	5	punct	5:punct	_

# doc_id = doc_1
# sent_id = weblog-juancole.com_juancole_20051126063000_ENG_20051126_063000-0028
# text = This item is a small one and easily missed.
1	This	this	DET	DT	Number=Sing|PronType=Dem	2	det	2:det	_
2	item	item	NOUN	NN	Number=Sing	6	nsubj	6:nsubj|9:nsubj:pass	_
3	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	6	cop	6:cop	_
4	a	a	DET	DT	Definite=Ind|PronType=Art	6	det	6:det	_
5	small	small	ADJ	JJ	Degree=Pos	6	amod	6:amod	_
6	one	one	NOUN	NN	Number=Sing	0	root	0:root	_
7	and	and	CCONJ	CC	_	9	cc	9:cc	_
8	easily	easily	ADV	RB	_	9	advmod	9:advmod	_
9	missed	miss	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	6	conj	6:conj:and	SpaceAfter=No
10	.	.	PUNCT	.	_	6	punct	6:punct	_

# doc_id = doc_2
# sent_id = weblog-juancole.com_juancole_20051126063000_ENG_20051126_063000-0029
# text = But in my view it is highly significant.
1	But	but	CCONJ	CC	_	8	cc	8:cc	_
2	in	in	ADP	IN	_	4	case	4:case	_
3	my	my	PRON	PRP$	Case=Gen|Number=Sing|Person=1|Poss=Yes|PronType=Prs	4	nmod:poss	4:nmod:poss	_
4	view	view	NOUN	NN	Number=Sing	8	obl	8:obl:in	_
5	it	it	PRON	PRP	Case=Nom|Gender=Neut|Number=Sing|Person=3|PronType=Prs	8	nsubj	8:nsubj	_
6	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	8	cop	8:cop	_
7	highly	highly	ADV	RB	_	8	advmod	8:advmod	_
8	significant	significant	ADJ	JJ	Degree=Pos	0	root	0:root	SpaceAfter=No
9	.	.	PUNCT	.	_	8	punct	8:punct	_

# sent_id = weblog-juancole.com_juancole_20051126063000_ENG_20051126_063000-0040
# text = The trial begins again Nov.28.
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	2:det	_
2	trial	trial	NOUN	NN	Number=Sing	3	nsubj	3:nsubj	_
3	begins	begin	VERB	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	0	root	0:root	_
4	again	again	ADV	RB	_	3	advmod	3:advmod	_
5	Nov.	November	PROPN	NNP	Abbr=Yes|Number=Sing	3	obl:tmod	3:obl:tmod	SpaceAfter=No
6	28	28	NUM	CD	NumForm=Digit|NumType=Card	5	nummod	5:nummod	SpaceAfter=No
7	.	.	PUNCT	.	_	3	punct	3:punct	_

c                  C   s   t jtd} t| dksJ t| d jdksJ t| d jdks$J dtddd  }t j|d} t| dks>J t| d jdksIJ t| d jdksTJ t| d jdks_J d S )Nr   rF   r   r<   rd   r=   )r   conll2multi_docsMULTIPLE_DOC_IDSrM   rs   r   r_   )docsr?   rP   rP   rQ   test_read_multiple_doc_ids  s   r   at  
# text = This is a test
# sent_id = 0
1	This	this	PRON	DT	Number=Sing|PronType=Dem	4	nsubj	_	start_char=0|end_char=4
2	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	4	cop	_	start_char=5|end_char=7
3	a	a	DET	DT	Definite=Ind|PronType=Art	4	det	_	start_char=8|end_char=9
4	test	test	NOUN	NN	Number=Sing	0	root	_	start_char=10|end_char=14|SpaceAfter=No
c                  C   sH   t jtd} t |  }g dg dg dg dgg}||ks"J d S )Nr   )
r   Thisthisr	   DTzNumber=Sing|PronType=Demr   r   r
   r   )
r   isber   VBZz5Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Finr   copr
   zstart_char=5|end_char=7)
r   r   r   r    r   zDefinite=Ind|PronType=Artr   r#   r
   zstart_char=8|end_char=9)
r   testr   r&   NNzNumber=Singr   r   r
   z'SpaceAfter=No|start_char=10|end_char=14)r   r   ENGLISH_TEST_SENTENCEconvert_dictrn   )rf   	convertedr   rP   rP   rQ   test_convert_dict  s   r   )5__doc__r   r   r   zipfiler   stanzastanza.utils.conllr   stanza.models.common.docr   stanza.testsmarkpipeline
pytestmarkrT   rU   rY   rm   ro   r   rr   rw   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   lstripr   r   r   r   rP   rP   rP   rQ   <module>   s    

	
	
35	