o
    hG@                     @   s   d dl Z d dlmZ e jje jjgZd dlmZ d dl	m
  mZ dZdZdd ZdZd	Zd
d ZdZdZdd ZdZdZdd Zd1ddZdZdd ZdZdZdd Zdd Zd Zd!d" Z d#Z!d$d% Z"d&Z#d'd( Z$d)Z%d*d+ Z&d,' Z(d-) Z*d.Z+d/d0 Z,dS )2    N)compare_ignoring_whitespace)CoNLLa  
# sent_id = 271
# text = Hers is easy to clean.
# previous = What did the dealer like about Alex's car?
# comment = extraction/raising via "tough extraction" and clausal subject
1	Hers	hers	PRON	PRP	Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs	3	nsubj	_	_
2	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	3	cop	_	_
3	easy	easy	ADJ	JJ	Degree=Pos	0	root	_	_
4	to	to	PART	TO	_	5	mark	_	_
5	clean	clean	VERB	VB	VerbForm=Inf	3	csubj	_	SpaceAfter=No
6	.	.	PUNCT	.	_	5	punct	_	_
a  
# sent_id = 271
# text = Hers is easy to clean.
# previous = What did the dealer like about Alex's car?
# comment = extraction/raising via "tough extraction" and clausal subject
1	Hers	hers	PRON	PRP	Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs	3	nsubj	_	_
2	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	3	cop	_	_
3	easy	easy	ADJ	JJ	Degree=Pos	0	root	_	_
4	to	to	PART	TO	_	5	mark	_	_
5	clean	clean	VERB	VB	VerbForm=Inf	3	advcl	_	SpaceAfter=No
6	.	.	PUNCT	.	_	5	punct	_	_
c                  C   H   d} dg}t jtd}t|| |}t||}d|}t|t d S )Nz!{}=source >nsubj {} >csubj=bad {}z&relabelNamedEdge -edge bad -reln advcl	input_str{:C})	r   	conll2docSAMPLE_DOC_INPUTssurgeonprocess_doc_one_operationconvert_response_to_docformatr   SAMPLE_DOC_EXPECTEDsemgrex_patternssurgeon_editsdocssurgeon_responseupdated_docresult r   \/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/server/test_ssurgeon.pytest_ssurgeon_same_length%   s   
r   a  
# text = Jennifer has lovely antennae.
# sent_id = 12
# comment = if you're in to that kind of thing
1	Jennifer	Jennifer	PROPN	NNP	Number=Sing	2	nsubj	_	start_char=0|end_char=8|ner=S-PERSON
2	has	have	VERB	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	0	root	_	start_char=9|end_char=12|ner=O
3	lovely	lovely	ADJ	JJ	Degree=Pos	4	amod	_	start_char=13|end_char=19|ner=O
4	antennae	antenna	NOUN	NNS	Number=Plur	2	obj	_	start_char=20|end_char=28|ner=O|SpaceAfter=No
5	.	.	PUNCT	.	_	2	punct	_	start_char=28|end_char=29|ner=O
a  
# text = Jennifer has lovely blue antennae.
# sent_id = 12
# comment = if you're in to that kind of thing
1	Jennifer	Jennifer	PROPN	NNP	Number=Sing	2	nsubj	_	ner=S-PERSON
2	has	have	VERB	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	0	root	_	ner=O
3	lovely	lovely	ADJ	JJ	Degree=Pos	5	amod	_	ner=O
4	blue	blue	ADJ	JJ	_	5	amod	_	ner=O
5	antennae	antenna	NOUN	NNS	Number=Plur	2	obj	_	SpaceAfter=No|ner=O
6	.	.	PUNCT	.	_	2	punct	_	ner=O
c                  C   r   )Nz'{word:antennae}=antennae !> {word:blue}znaddDep -gov antennae -reln amod -word blue -lemma blue -cpos ADJ -pos JJ -ner O -position -antennae -after " "r   r   )	r   r   ADD_WORD_DOC_INPUTr
   r   r   r   r   ADD_WORD_DOC_EXPECTEDr   r   r   r   test_ssurgeon_different_lengthL   s   
r   a  
# sent_id = 25
# text = It's not yours!
# comment = negation 
1	It	it	PRON	PRP	Number=Sing|Person=2|PronType=Prs	4	nsubj	_	SpaceAfter=No
2	's	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	4	cop	_	_
3	not	not	PART	RB	Polarity=Neg	4	advmod	_	_
4	yours	yours	PRON	PRP	Gender=Neut|Number=Sing|Person=2|Poss=Yes|PronType=Prs	0	root	_	SpaceAfter=No
5	!	!	PUNCT	.	_	4	punct	_	_
a  
# sent_id = 25
# text = It's not yours!
# comment = negation
1-2	It's	_	_	_	_	_	_	_	_
1	It	it	PRON	PRP	Number=Sing|Person=2|PronType=Prs	4	nsubj	_	_
2	's	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	4	cop	_	_
3	not	not	PART	RB	Polarity=Neg	4	advmod	_	_
4	yours	yours	PRON	PRP	Gender=Neut|Number=Sing|Person=2|Poss=Yes|PronType=Prs	0	root	_	SpaceAfter=No
5	!	!	PUNCT	.	_	4	punct	_	_
c                  C   J   d} ddg}t jtd}t|| |}t||}d|}t|t dS )zN
    Test that converting a document, adding a new MWT, works as expected
    {word:It}=it . {word:/'s/}=sBEditNode -node it -is_mwt true  -is_first_mwt true  -mwt_text It'sBEditNode -node s  -is_mwt true  -is_first_mwt false -mwt_text It'sr   r   N)	r   r   BECOME_MWT_DOC_INPUTr
   r   r   r   r   BECOME_MWT_DOC_EXPECTEDr   r   r   r   test_ssurgeon_become_mwtt   s   
r"   u  
# sent_id = newsgroup-groups.google.com_GayMarriage_0ccbb50b41a5830b_ENG_20050321_181500-0005
# text = One of “NCRC4ME’s”
1	One	one	NUM	CD	NumType=Card	0	root	0:root	_
2	of	of	ADP	IN	_	4	case	4:case	_
3	“	"	PUNCT	``	_	4	punct	4:punct	SpaceAfter=No
4-5	NCRC4ME’s	_	_	_	_	_	_	_	SpaceAfter=No
4	NCRC4ME	NCRC4ME	PROPN	NNP	Number=Sing	1	compound	1:compound	_
5	’s	's	PART	POS	_	4	case	4:case	_
6	”	"	PUNCT	''	_	4	punct	4:punct	_
u  
# sent_id = newsgroup-groups.google.com_GayMarriage_0ccbb50b41a5830b_ENG_20050321_181500-0005
# text = One of “NCRC4ME’s”
1	One	one	NUM	CD	NumType=Card	0	root	_	_
2	of	of	ADP	IN	_	4	case	_	_
3	“	"	PUNCT	``	_	4	punct	_	SpaceAfter=No
4-5	NCRC4ME’s	_	_	_	_	_	_	_	SpaceAfter=No
4	NCRC4ME	NCRC4ME	PROPN	NNP	Number=Sing	1	compound	_	_
5	’s	's	PART	POS	_	4	case	_	_
6	”	"	PUNCT	''	_	4	punct	_	_
c                  C   r   )a  
    Test that converting a document with an MWT works as expected

    Note regarding this test:
    Currently it works because ssurgeon.py doesn't look at the
      "changed" flag because of a bug in EditNode in CoreNLP 4.5.3
    If that is fixed, but the enhanced dependencies aren't fixed,
      this test will fail because the enhanced dependencies *aren't*
      removed.  Fixing the enhanced dependencies as well will fix
      that, though.
    r   r   r   r   r   N)	r   r   EXISTING_MWT_DOC_INPUTr
   r   r   r   r   EXISTING_MWT_DOC_EXPECTEDr   r   r   r   $test_ssurgeon_existing_mwt_no_change   s   
r%   Fc                 C   s|   |d u r| }t j| d}t|g }t||}d|}|r7td t|  td t| td t| t|| d S )Nr   r   INPUTEXPECTEDRESULT)r   r   r
   process_docr   r   printr   )
input_textexpectedechor   r   r   r   r   r   r   check_empty_test   s   
r.   a  
# sent_id = train_78
# text = @user dovrebbe fare pace col cervello
# twittiro = IMPLICIT	ANALOGY
1	@user	@user	SYM	SYM	_	3	nsubj	_	_
2	dovrebbe	dovere	AUX	VM	Mood=Cnd|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	3	aux	_	_
3	fare	fare	VERB	V	VerbForm=Inf	0	root	_	_
4	pace	pace	NOUN	S	Gender=Fem|Number=Sing	3	obj	_	_
5-6	col	_	_	_	_	_	_	_	_
5	con	con	ADP	E	_	7	case	_	_
6	il	il	DET	RD	Definite=Def|Gender=Masc|Number=Sing|PronType=Art	7	det	_	_
7	cervello	cervello	NOUN	S	Gender=Masc|Number=Sing	3	obl	_	_
c                   C      t t dS )z
    Test that an MWT which is split into pieces which don't make up
    the original token results in a correct #text annotation

    For example, in Italian, "col" splits into "con il", and we want
    the #text to contain "col"
    N)r.   ITALIAN_MWT_INPUTr   r   r   r   test_ssurgeon_mwt_text   s   r1   u1  
# sent_id = train_1114
# text = ““““ buona scuola ““““
# twittiro = EXPLICIT	OTHER
1	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
2	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
3	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
4	“	“	PUNCT	FB	_	6	punct	_	_
5	buona	buono	ADJ	A	Gender=Fem|Number=Sing	6	amod	_	_
6	scuola	scuola	NOUN	S	Gender=Fem|Number=Sing	0	root	_	_
7	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
8	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
9	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
10	“	“	PUNCT	FB	_	6	punct	_	SpacesAfter=\n
u>  
# sent_id = train_1114
# text = ““““ buona scuola ““““
# twittiro = EXPLICIT	OTHER
1	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
2	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
3	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
4	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=Yes
5	buona	buono	ADJ	A	Gender=Fem|Number=Sing	6	amod	_	_
6	scuola	scuola	NOUN	S	Gender=Fem|Number=Sing	0	root	_	_
7	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
8	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
9	“	“	PUNCT	FB	_	6	punct	_	SpaceAfter=No
10	“	“	PUNCT	FB	_	6	punct	_	SpacesAfter=\n
c                   C   r/   )z
    Test that SpacesAfter goes and comes back the same way

    Tested using some random example from the UD_Italian-TWITTIRO dataset
    N)r.   ITALIAN_SPACES_AFTER_INPUTr   r   r   r   test_ssurgeon_spaces_after_text     r3   c                   C   s   t tt dS )z?
    Test that an unnecessary SpaceAfter=Yes is eliminated
    N)r.   ITALIAN_SPACES_AFTER_YES_INPUTr2   r   r   r   r   test_ssurgeon_spaces_after_yes
  s   r6   ai  
# text = Jennifer has lovely antennae.
# sent_id = 12
# comment = if you're in to that kind of thing
1	Jennifer	_	_	_	Number=Sing	2	nsubj	_	ner=S-PERSON
2	has	_	_	_	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	0	root	_	ner=O
3	lovely	_	_	_	Degree=Pos	4	amod	_	ner=O
4	antennae	_	_	_	Number=Plur	2	obj	_	SpaceAfter=No|ner=O
5	.	_	_	_	_	2	punct	_	ner=O
c                   C   r/   z
    Check that various None fields such as lemma & xpos are not turned into blanks

    Tests, like regulations, are often written in blood
    N)r.   EMPTY_VALUES_INPUTr   r   r   r   test_ssurgeon_blank_values  r4   r9   ug  
# sent_id = 1
# text = 你喺度搵乜嘢呀？
1	你	你	PRON	_	_	3	nsubj	_	Translit=nei5|Gloss=2SG|SpaceAfter=No
2	喺度	喺度	ADV	_	_	3	advmod	_	Translit=hai2dou6|Gloss=PROG|SpaceAfter=No
3	搵	搵	VERB	_	_	0	root	_	Translit=wan2|Gloss=find|SpaceAfter=No
4	乜嘢	乜嘢	PRON	_	_	3	obj	_	Translit=mat1je5|Gloss=what|SpaceAfter=No
5	呀	呀	PART	_	_	3	discourse:sp	_	Translit=aa3|Gloss=SFP|SpaceAfter=No
6	？	？	PUNCT	_	_	3	punct	_	SpaceAfter=No

# sent_id = 2
# text = 咪執返啲嘢去阿哥個新屋度囖。
1	咪	咪	ADV	_	_	2	advmod	_	SpaceAfter=No
2	執	執	VERB	_	_	0	root	_	SpaceAfter=No
3	返	返	VERB	_	_	2	compound:dir	_	SpaceAfter=No
4	啲	啲	NOUN	_	NounType=Clf	5	clf:det	_	SpaceAfter=No
5	嘢	嘢	NOUN	_	_	3	obj	_	SpaceAfter=No
6	去	去	VERB	_	_	2	conj	_	SpaceAfter=No
7	阿哥	阿哥	NOUN	_	_	10	nmod	_	SpaceAfter=No
8	個	個	NOUN	_	NounType=Clf	10	clf:det	_	SpaceAfter=No
9	新	新	ADJ	_	_	10	amod	_	SpaceAfter=No
10	屋	屋	NOUN	_	_	6	obj	_	SpaceAfter=No
11	度	度	ADP	_	_	10	case:loc	_	SpaceAfter=No
12	囖	囖	PART	_	_	2	discourse:sp	_	SpaceAfter=No
13	。	。	PUNCT	_	_	2	punct	_	SpaceAfter=No
c                   C   r/   r7   )r.   CANTONESE_MISC_WORDS_INPUTr   r   r   r   test_ssurgeon_misc_wordsA  r4   r;   a  
# sent_id = train_78
# text = @user dovrebbe fare pace colcervello
# twittiro = IMPLICIT	ANALOGY
1	@user	@user	SYM	SYM	_	3	nsubj	_	_
2	dovrebbe	dovere	AUX	VM	Mood=Cnd|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	3	aux	_	_
3	fare	fare	VERB	V	VerbForm=Inf	0	root	_	_
4	pace	pace	NOUN	S	Gender=Fem|Number=Sing	3	obj	_	_
5-6	col	_	_	_	_	_	_	_	SpaceAfter=No
5	con	con	ADP	E	_	7	case	_	_
6	il	il	DET	RD	Definite=Def|Gender=Masc|Number=Sing|PronType=Art	7	det	_	_
7	cervello	cervello	NOUN	S	Gender=Masc|Number=Sing	3	obl	_	RandomFeature=foo
c                   C   r/   z
    Check the SpaceAfter=No on an MWT (rather than a word)

    the RandomFeature=foo is on account of a silly bug in the initial
    version of passing in MWT misc features
    N)r.   ITALIAN_MWT_SPACE_AFTER_INPUTr   r   r   r   test_ssurgeon_mwt_space_afterW     r>   aa  
# sent_id = train_78
# text = @user dovrebbe farepacecolcervello
# twittiro = IMPLICIT	ANALOGY
1	@user	@user	SYM	SYM	_	3	nsubj	_	_
2	dovrebbe	dovere	AUX	VM	Mood=Cnd|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	3	aux	_	_
3-4	farepace	_	_	_	_	_	_	_	Players=GonnaPlay|SpaceAfter=No
3	fare	fare	VERB	V	VerbForm=Inf	0	root	_	_
4	pace	pace	NOUN	S	Gender=Fem|Number=Sing	3	obj	_	_
5-6	col	_	_	_	_	_	_	_	Haters=GonnaHate|SpaceAfter=No
5	con	con	ADP	E	_	7	case	_	_
6	il	il	DET	RD	Definite=Def|Gender=Masc|Number=Sing|PronType=Art	7	det	_	_
7	cervello	cervello	NOUN	S	Gender=Masc|Number=Sing	3	obl	_	RandomFeature=foo
c                   C   r/   r<   )r.   ITALIAN_MWT_MISC_INPUTr   r   r   r   test_ssurgeon_mwt_misco  r?   rA   u`  
# sent_id = 1
# text = غلام رهڻ سان ماڻهو منافق ٿئي ٿو .
1	غلام	غلام	NOUN	NN__اسم	Case=Acc|Gender=Masc|Number=Sing|Person=3	2	compound	_	_
2	رهڻ	ره	VERB	VB__فعل	Number=Sing	6	advcl	_	_
3	سان	سان	ADP	IN__حرفِ_جر	Number=Sing	2	mark	_	_
4	ماڻهو	ماڻهو	NOUN	NN__اسم	Case=Nom|Gender=Masc|Number=Sing|Person=3	6	nsubj	_	_
5	منافق	منافق	ADJ	JJ__صفت	Case=Acc|Number=Sing|Person=3	6	xcomp	_	_
6	ٿئي	ٿي	VERB	VB__فعل	Number=Sing	_	_	_	_
7	ٿو	ٿو	AUX	VB__فعل	Number=Sing	6	aux	_	_
8	.	.	PUNCT	-__پورو_دم	_	6	punct	_	_
uc  
# sent_id = 1
# text = غلام رهڻ سان ماڻهو منافق ٿئي ٿو .
1	غلام	غلام	NOUN	NN__اسم	Case=Acc|Gender=Masc|Number=Sing|Person=3	2	compound	_	_
2	رهڻ	ره	VERB	VB__فعل	Number=Sing	6	advcl	_	_
3	سان	سان	ADP	IN__حرفِ_جر	Number=Sing	2	mark	_	_
4	ماڻهو	ماڻهو	NOUN	NN__اسم	Case=Nom|Gender=Masc|Number=Sing|Person=3	6	nsubj	_	_
5	منافق	منافق	ADJ	JJ__صفت	Case=Acc|Number=Sing|Person=3	6	xcomp	_	_
6	ٿئي	ٿي	VERB	VB__فعل	Number=Sing	0	root	_	_
7	ٿو	ٿو	AUX	VB__فعل	Number=Sing	6	aux	_	_
8	.	.	PUNCT	-__پورو_دم	_	6	punct	_	_
z
{}=root !< {}
setRoots root
c                  C   sx   t t} t jddgddddg}| |ksJ tjtd}t || }t || }t 	||}d
|}|tks:J d	S )
zF
    A user / contributor sent a dependency file with blank roots
    z{}=root !< {}zsetRoots root1 UniversalEnglish)r   r   ssurgeon_idnoteslanguager   r   N)r
   parse_ssurgeon_editsSINDHI_EDITSsurgeonEditr   r   SINDHI_ROOT_EXAMPLEbuild_requestr)   r   r   SINDHI_ROOT_EXPECTED)editsexpected_editsblank_dep_docrequestresponser   r   r   r   r   "test_ssurgeon_rewrite_sindhi_roots  s   

rS   )NF)-pyteststanza.testsr   marktravisclient
pytestmarkstanza.utils.conllr   stanza.server.ssurgeonserverr
   r	   r   r   r   r   r   r    r!   r"   r#   r$   r%   r.   r0   r1   r2   r5   r3   r6   r8   r9   r:   r;   r=   r>   r@   rA   lstriprK   striprM   rI   rS   r   r   r   r   <module>   sP    

		