o
    hX:                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlT ddlm	Z	m
Z
 ejjejjgZdZg dZd	 Zd
 Zd Zd Zd ZG dd dZdS )z'
Basic testing of the English pipeline
    N)CoNLLDocument)*)check_on_gpucheck_on_cpuz\Barack Obama was born in Hawaii.  He was elected president in 2008.  Obama attended Harvard.)z Barack Obama was born in Hawaii.z!He was elected president in 2008.zObama attended Harvard.a^  
<Token id=1;words=[<Word id=1;text=Barack;lemma=Barack;upos=PROPN;xpos=NNP;feats=Number=Sing;head=4;deprel=nsubj:pass>]>
<Token id=2;words=[<Word id=2;text=Obama;lemma=Obama;upos=PROPN;xpos=NNP;feats=Number=Sing;head=1;deprel=flat>]>
<Token id=3;words=[<Word id=3;text=was;lemma=be;upos=AUX;xpos=VBD;feats=Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin;head=4;deprel=aux:pass>]>
<Token id=4;words=[<Word id=4;text=born;lemma=bear;upos=VERB;xpos=VBN;feats=Tense=Past|VerbForm=Part|Voice=Pass;head=0;deprel=root>]>
<Token id=5;words=[<Word id=5;text=in;lemma=in;upos=ADP;xpos=IN;head=6;deprel=case>]>
<Token id=6;words=[<Word id=6;text=Hawaii;lemma=Hawaii;upos=PROPN;xpos=NNP;feats=Number=Sing;head=4;deprel=obl>]>
<Token id=7;words=[<Word id=7;text=.;lemma=.;upos=PUNCT;xpos=.;head=4;deprel=punct>]>

<Token id=1;words=[<Word id=1;text=He;lemma=he;upos=PRON;xpos=PRP;feats=Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs;head=3;deprel=nsubj:pass>]>
<Token id=2;words=[<Word id=2;text=was;lemma=be;upos=AUX;xpos=VBD;feats=Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin;head=3;deprel=aux:pass>]>
<Token id=3;words=[<Word id=3;text=elected;lemma=elect;upos=VERB;xpos=VBN;feats=Tense=Past|VerbForm=Part|Voice=Pass;head=0;deprel=root>]>
<Token id=4;words=[<Word id=4;text=president;lemma=president;upos=NOUN;xpos=NN;feats=Number=Sing;head=3;deprel=xcomp>]>
<Token id=5;words=[<Word id=5;text=in;lemma=in;upos=ADP;xpos=IN;head=6;deprel=case>]>
<Token id=6;words=[<Word id=6;text=2008;lemma=2008;upos=NUM;xpos=CD;feats=NumForm=Digit|NumType=Card;head=3;deprel=obl>]>
<Token id=7;words=[<Word id=7;text=.;lemma=.;upos=PUNCT;xpos=.;head=3;deprel=punct>]>

<Token id=1;words=[<Word id=1;text=Obama;lemma=Obama;upos=PROPN;xpos=NNP;feats=Number=Sing;head=2;deprel=nsubj>]>
<Token id=2;words=[<Word id=2;text=attended;lemma=attend;upos=VERB;xpos=VBD;feats=Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin;head=0;deprel=root>]>
<Token id=3;words=[<Word id=3;text=Harvard;lemma=Harvard;upos=PROPN;xpos=NNP;feats=Number=Sing;head=2;deprel=obj>]>
<Token id=4;words=[<Word id=4;text=.;lemma=.;upos=PUNCT;xpos=.;head=2;deprel=punct>]>
a  
<Word id=1;text=Barack;lemma=Barack;upos=PROPN;xpos=NNP;feats=Number=Sing;head=4;deprel=nsubj:pass>
<Word id=2;text=Obama;lemma=Obama;upos=PROPN;xpos=NNP;feats=Number=Sing;head=1;deprel=flat>
<Word id=3;text=was;lemma=be;upos=AUX;xpos=VBD;feats=Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin;head=4;deprel=aux:pass>
<Word id=4;text=born;lemma=bear;upos=VERB;xpos=VBN;feats=Tense=Past|VerbForm=Part|Voice=Pass;head=0;deprel=root>
<Word id=5;text=in;lemma=in;upos=ADP;xpos=IN;head=6;deprel=case>
<Word id=6;text=Hawaii;lemma=Hawaii;upos=PROPN;xpos=NNP;feats=Number=Sing;head=4;deprel=obl>
<Word id=7;text=.;lemma=.;upos=PUNCT;xpos=.;head=4;deprel=punct>

<Word id=1;text=He;lemma=he;upos=PRON;xpos=PRP;feats=Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs;head=3;deprel=nsubj:pass>
<Word id=2;text=was;lemma=be;upos=AUX;xpos=VBD;feats=Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin;head=3;deprel=aux:pass>
<Word id=3;text=elected;lemma=elect;upos=VERB;xpos=VBN;feats=Tense=Past|VerbForm=Part|Voice=Pass;head=0;deprel=root>
<Word id=4;text=president;lemma=president;upos=NOUN;xpos=NN;feats=Number=Sing;head=3;deprel=xcomp>
<Word id=5;text=in;lemma=in;upos=ADP;xpos=IN;head=6;deprel=case>
<Word id=6;text=2008;lemma=2008;upos=NUM;xpos=CD;feats=NumForm=Digit|NumType=Card;head=3;deprel=obl>
<Word id=7;text=.;lemma=.;upos=PUNCT;xpos=.;head=3;deprel=punct>

<Word id=1;text=Obama;lemma=Obama;upos=PROPN;xpos=NNP;feats=Number=Sing;head=2;deprel=nsubj>
<Word id=2;text=attended;lemma=attend;upos=VERB;xpos=VBD;feats=Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin;head=0;deprel=root>
<Word id=3;text=Harvard;lemma=Harvard;upos=PROPN;xpos=NNP;feats=Number=Sing;head=2;deprel=obj>
<Word id=4;text=.;lemma=.;upos=PUNCT;xpos=.;head=2;deprel=punct>
a  
('Barack', 4, 'nsubj:pass')
('Obama', 1, 'flat')
('was', 4, 'aux:pass')
('born', 0, 'root')
('in', 6, 'case')
('Hawaii', 4, 'obl')
('.', 4, 'punct')

('He', 3, 'nsubj:pass')
('was', 3, 'aux:pass')
('elected', 0, 'root')
('president', 3, 'xcomp')
('in', 6, 'case')
('2008', 3, 'obl')
('.', 3, 'punct')

('Obama', 2, 'nsubj')
('attended', 0, 'root')
('Harvard', 2, 'obj')
('.', 2, 'punct')
a  
# text = Barack Obama was born in Hawaii.
# sent_id = 0
# constituency = (ROOT (S (NP (NNP Barack) (NNP Obama)) (VP (VBD was) (VP (VBN born) (PP (IN in) (NP (NNP Hawaii))))) (. .)))
# sentiment = 1
1	Barack	Barack	PROPN	NNP	Number=Sing	4	nsubj:pass	_	start_char=0|end_char=6|ner=B-PERSON
2	Obama	Obama	PROPN	NNP	Number=Sing	1	flat	_	start_char=7|end_char=12|ner=E-PERSON
3	was	be	AUX	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	4	aux:pass	_	start_char=13|end_char=16|ner=O
4	born	bear	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	0	root	_	start_char=17|end_char=21|ner=O
5	in	in	ADP	IN	_	6	case	_	start_char=22|end_char=24|ner=O
6	Hawaii	Hawaii	PROPN	NNP	Number=Sing	4	obl	_	start_char=25|end_char=31|ner=S-GPE|SpaceAfter=No
7	.	.	PUNCT	.	_	4	punct	_	start_char=31|end_char=32|ner=O|SpacesAfter=\s\s

# text = He was elected president in 2008.
# sent_id = 1
# constituency = (ROOT (S (NP (PRP He)) (VP (VBD was) (VP (VBN elected) (S (NP (NN president))) (PP (IN in) (NP (CD 2008))))) (. .)))
# sentiment = 1
1	He	he	PRON	PRP	Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs	3	nsubj:pass	_	start_char=34|end_char=36|ner=O
2	was	be	AUX	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	3	aux:pass	_	start_char=37|end_char=40|ner=O
3	elected	elect	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	0	root	_	start_char=41|end_char=48|ner=O
4	president	president	NOUN	NN	Number=Sing	3	xcomp	_	start_char=49|end_char=58|ner=O
5	in	in	ADP	IN	_	6	case	_	start_char=59|end_char=61|ner=O
6	2008	2008	NUM	CD	NumForm=Digit|NumType=Card	3	obl	_	start_char=62|end_char=66|ner=S-DATE|SpaceAfter=No
7	.	.	PUNCT	.	_	3	punct	_	start_char=66|end_char=67|ner=O|SpacesAfter=\s\s

# text = Obama attended Harvard.
# sent_id = 2
# constituency = (ROOT (S (NP (NNP Obama)) (VP (VBD attended) (NP (NNP Harvard))) (. .)))
# sentiment = 1
1	Obama	Obama	PROPN	NNP	Number=Sing	2	nsubj	_	start_char=69|end_char=74|ner=S-PERSON
2	attended	attend	VERB	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	_	start_char=75|end_char=83|ner=O
3	Harvard	Harvard	PROPN	NNP	Number=Sing	2	obj	_	start_char=84|end_char=91|ner=S-ORG|SpaceAfter=No
4	.	.	PUNCT	.	_	2	punct	_	start_char=91|end_char=92|ner=O|SpaceAfter=No
a  
# text = Barack Obama was born in Hawaii.
# sent_id = 0
# constituency = (ROOT (S (NP (NNP Barack) (NNP Obama)) (VP (VBD was) (VP (VBN born) (PP (IN in) (NP (NNP Hawaii))))) (. .)))
# sentiment = 1
1	Barack	Barack	PROPN	NNP	Number=Sing	4	nsubj:pass	_	start_char=0|end_char=6|ner=B-PERSON
2	Obama	Obama	PROPN	NNP	Number=Sing	1	flat	_	start_char=7|end_char=12|ner=E-PERSON
3	was	be	AUX	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	4	aux:pass	_	start_char=13|end_char=16|ner=O
4	born	bear	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	0	root	_	start_char=17|end_char=21|ner=O
5	in	in	ADP	IN	_	6	case	_	start_char=22|end_char=24|ner=O
6	Hawaii	Hawaii	PROPN	NNP	Number=Sing	4	obl	_	start_char=25|end_char=31|ner=S-GPE|SpaceAfter=No
7	.	.	PUNCT	.	_	4	punct	_	start_char=31|end_char=32|ner=O|SpaceAfter=No

# text = He was elected president in 2008.
# sent_id = 1
# constituency = (ROOT (S (NP (PRP He)) (VP (VBD was) (VP (VBN elected) (S (NP (NN president))) (PP (IN in) (NP (CD 2008))))) (. .)))
# sentiment = 1
1	He	he	PRON	PRP	Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs	3	nsubj:pass	_	start_char=0|end_char=2|ner=O
2	was	be	AUX	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	3	aux:pass	_	start_char=3|end_char=6|ner=O
3	elected	elect	VERB	VBN	Tense=Past|VerbForm=Part|Voice=Pass	0	root	_	start_char=7|end_char=14|ner=O
4	president	president	NOUN	NN	Number=Sing	3	xcomp	_	start_char=15|end_char=24|ner=O
5	in	in	ADP	IN	_	6	case	_	start_char=25|end_char=27|ner=O
6	2008	2008	NUM	CD	NumForm=Digit|NumType=Card	3	obl	_	start_char=28|end_char=32|ner=S-DATE|SpaceAfter=No
7	.	.	PUNCT	.	_	3	punct	_	start_char=32|end_char=33|ner=O|SpaceAfter=No

# text = Obama attended Harvard.
# sent_id = 2
# constituency = (ROOT (S (NP (NNP Obama)) (VP (VBD attended) (NP (NNP Harvard))) (. .)))
# sentiment = 1
1	Obama	Obama	PROPN	NNP	Number=Sing	2	nsubj	_	start_char=0|end_char=5|ner=S-PERSON
2	attended	attend	VERB	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	_	start_char=6|end_char=14|ner=O
3	Harvard	Harvard	PROPN	NNP	Number=Sing	2	obj	_	start_char=15|end_char=22|ner=S-ORG|SpaceAfter=No
4	.	.	PUNCT	.	_	2	punct	_	start_char=22|end_char=23|ner=O|SpaceAfter=No
c                   @   s   e Zd Zejdddd Zejdddd Zdd Zd	d
 Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zejdddd Zdd Zdd Zdd  Zd!d" Zd#d$ Zejddd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/S )0TestEnglishPipelineclass)scopec                 C   s   t jtdS )N)dir)stanzaPipelineTEST_MODELS_DIR)self r   f/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/pipeline/test_english_pipeline.pypipeline   s   zTestEnglishPipeline.pipelinec                 C   s   |t S )F Document created by running full English pipeline on a few sentences )EN_DOCr   r   r   r   r   processed_doc   s   z!TestEnglishPipeline.processed_docc                 C   s   |j tksJ d S )N)textr   r   r   r   r   r   	test_text   s   zTestEnglishPipeline.test_textc                 C   s   d |tks	J d S )N{:C})formatEN_DOC_CONLLU_GOLDr   r   r   r   test_conllu   s   zTestEnglishPipeline.test_conlluc                 C   "   d dd |jD tksJ d S )N

c                 S      g | ]}|  qS r   )tokens_string.0sentr   r   r   
<listcomp>       z3TestEnglishPipeline.test_tokens.<locals>.<listcomp>)join	sentencesEN_DOC_TOKENS_GOLDr   r   r   r   test_tokens      "zTestEnglishPipeline.test_tokensc                 C   r   )Nr   c                 S   r    r   )words_stringr"   r   r   r   r%      r&   z2TestEnglishPipeline.test_words.<locals>.<listcomp>)r'   r(   EN_DOC_WORDS_GOLDr   r   r   r   
test_words   r+   zTestEnglishPipeline.test_wordsc                 C   r   )Nr   c                 S   r    r   )dependencies_stringr"   r   r   r   r%      r&   z=TestEnglishPipeline.test_dependency_parse.<locals>.<listcomp>)r'   r(   EN_DOC_DEPENDENCY_PARSES_GOLDr   r   r   r   test_dependency_parse   s   z)TestEnglishPipeline.test_dependency_parsec                 C   s   |d |d d S )N z--r   r   r   r   r   
test_empty   s   zTestEnglishPipeline.test_emptyc                 C   s^   | t}ddd |D tksJ dd tD }| |}ddd |D tks-J dS )zV Double check that the bulk_process method in Pipeline converts documents as expected r   c                 S      g | ]}d  |qS r   r   r#   docr   r   r   r%          z9TestEnglishPipeline.test_bulk_process.<locals>.<listcomp>c                 S      g | ]}t g |d qS )r   r   r#   tr   r   r   r%          c                 S   r4   r5   r6   r7   r   r   r   r%      r9   N)bulk_processEN_DOCSr'   EN_DOC_CONLLU_GOLD_MULTIDOC)r   r   	processeddocsr   r   r   test_bulk_process   s
   

 z%TestEnglishPipeline.test_bulk_processc                 C   s   | g }|g ksJ dS )zJ Previously we had a bug where an empty document list would cause a crash N)r?   r   r   rB   r   r   r   test_empty_bulk_process   s   
z+TestEnglishPipeline.test_empty_bulk_processc                 C   s   dd | tD }ddd |D tksJ dd | ttD }ddd |D tks2J dd |j tdd	D }d
d |D }d|tksNJ dS )z. Test the streaming interface to the Pipeline c                 S      g | ]}|qS r   r   r7   r   r   r   r%          z3TestEnglishPipeline.test_stream.<locals>.<listcomp>r   c                 S   r4   r5   r6   r7   r   r   r   r%      r9   c                 S   rG   r   r   r7   r   r   r   r%      rH   c                 S   r4   r5   r6   r7   r   r   r   r%      r9   c                 S   rG   r   r   r7   r   r   r   r%      rH      )
batch_sizec                 S   r4   r5   r6   r7   r   r   r   r%      r9   N)streamr@   r'   rA   iterrE   r   r   r   test_stream   s   zTestEnglishPipeline.test_streamc                 C   s   dd t D }||S )r   c                 S   r:   r;   r   r<   r   r   r   r%      r>   z:TestEnglishPipeline.processed_multidoc.<locals>.<listcomp>)r@   )r   r   rC   r   r   r   processed_multidoc   s   z&TestEnglishPipeline.processed_multidocc                 C       d dd |D tksJ d S )Nr   c                 S   r4   r5   r6   r7   r   r   r   r%      r9   z<TestEnglishPipeline.test_conllu_multidoc.<locals>.<listcomp>)r'   rA   r   rN   r   r   r   test_conllu_multidoc       z(TestEnglishPipeline.test_conllu_multidocc                 C   rO   )Nr   c                 S       g | ]}|j D ]}| qqS r   )r(   r!   r#   r   r$   r   r   r   r%           z<TestEnglishPipeline.test_tokens_multidoc.<locals>.<listcomp>)r'   r)   rP   r   r   r   test_tokens_multidoc   rR   z(TestEnglishPipeline.test_tokens_multidocc                 C   rO   )Nr   c                 S   rS   r   )r(   r,   rT   r   r   r   r%      rU   z;TestEnglishPipeline.test_words_multidoc.<locals>.<listcomp>)r'   r-   rP   r   r   r   test_words_multidoc   rR   z'TestEnglishPipeline.test_words_multidocc                 C   s2   dd |D }t |D ]\}}||jksJ qd S )Nc                 S   s   g | ]
}|j D ]}|qqS r   )r(   )r#   r8   r$   r   r   r   r%      s    zFTestEnglishPipeline.test_sentence_indices_multidoc.<locals>.<listcomp>)	enumerateindex)r   rN   r(   sent_idxsentencer   r   r   test_sentence_indices_multidoc   s   z2TestEnglishPipeline.test_sentence_indices_multidocc                 C   rO   )Nr   c                 S   rS   r   r(   r/   rT   r   r   r   r%      rU   zFTestEnglishPipeline.test_dependency_parse_multidoc.<locals>.<listcomp>r'   r0   rP   r   r   r   test_dependency_parse_multidoc      z2TestEnglishPipeline.test_dependency_parse_multidocc                 C   s(   dd t D }tjtddid}||S )r   c                 S   r:   r;   r   r<   r   r   r   r%      r>   zBTestEnglishPipeline.processed_multidoc_variant.<locals>.<listcomp>tokenizespacyr   
processors)r@   r   r   r   )r   rC   nlpr   r   r   processed_multidoc_variant   s   z.TestEnglishPipeline.processed_multidoc_variantc                 C   rO   )Nr   c                 S   rS   r   r]   rT   r   r   r   r%     rU   zNTestEnglishPipeline.test_dependency_parse_multidoc_variant.<locals>.<listcomp>r^   )r   rf   r   r   r   &test_dependency_parse_multidoc_variant  r`   z:TestEnglishPipeline.test_dependency_parse_multidoc_variantc                 C   s2   t jtdd}|d}t|jd jdksJ d S )Nztokenize,pos,constituencyrc   zThis is a testr   z=(ROOT (S (NP (DT This)) (VP (VBZ is) (NP (DT a) (NN test))))))r   r   r   strr(   constituency)r   re   r8   r   r   r   test_constituency_parser  s   z,TestEnglishPipeline.test_constituency_parserc                 C   s   t | dS )zL
        The default pipeline should have all the models on the GPU
        N)r   r   r   r   r   test_on_gpu  s   zTestEnglishPipeline.test_on_gpuc                 C   s   t jdtdd}t| dS )zP
        Create a pipeline on the CPU, check that all the models on CPU
        enF)r   use_gpuN)r   r   r   r   r   r   r   r   test_on_cpu  s   zTestEnglishPipeline.test_on_cpuN)__name__
__module____qualname__pytestfixturer   r   r   r   r*   r.   r1   r3   rD   rF   rM   rN   rQ   rV   rW   r\   r_   rf   rg   rj   rk   rn   r   r   r   r   r      s6    







r   )__doc__rr   r   stanza.utils.conllr   stanza.models.common.docr   stanza.tests+stanza.tests.pipeline.pipeline_device_testsr   r   markr   travis
pytestmarkr   r@   stripr)   r-   r0   r   rA   r   r   r   r   r   <module>   s4    !#!#