o
    h                     @   sp   d Z ddlZddlZddlmZ ddlmZ ejj	Z
dZdZdZdd Zd	d
 ZdZdZdZdd Zdd ZdS )z
Misc tests for the server
    N)compare_ignoring_whitespacezJoe Smith lives in California.a  
Sentence #1 (6 tokens):
Joe Smith lives in California.

Tokens:
[Text=Joe CharacterOffsetBegin=0 CharacterOffsetEnd=3 PartOfSpeech=NNP Lemma=Joe NamedEntityTag=PERSON]
[Text=Smith CharacterOffsetBegin=4 CharacterOffsetEnd=9 PartOfSpeech=NNP Lemma=Smith NamedEntityTag=PERSON]
[Text=lives CharacterOffsetBegin=10 CharacterOffsetEnd=15 PartOfSpeech=VBZ Lemma=live NamedEntityTag=O]
[Text=in CharacterOffsetBegin=16 CharacterOffsetEnd=18 PartOfSpeech=IN Lemma=in NamedEntityTag=O]
[Text=California CharacterOffsetBegin=19 CharacterOffsetEnd=29 PartOfSpeech=NNP Lemma=California NamedEntityTag=STATE_OR_PROVINCE]
[Text=. CharacterOffsetBegin=29 CharacterOffsetEnd=30 PartOfSpeech=. Lemma=. NamedEntityTag=O]

Dependency Parse (enhanced plus plus dependencies):
root(ROOT-0, lives-3)
compound(Smith-2, Joe-1)
nsubj(lives-3, Smith-2)
case(California-5, in-4)
obl:in(lives-3, California-5)
punct(lives-3, .-6)

Extracted the following NER entity mentions:
Joe Smith       PERSON  PERSON:0.9972202681743931
California      STATE_OR_PROVINCE       LOCATION:0.9990868267559281

Extracted the following KBP triples:
1.0     Joe Smith       per:statesorprovinces_of_residence      California
a  
Sentence #1 (6 tokens):
Joe Smith lives in California.

Tokens:
[Text=Joe CharacterOffsetBegin=0 CharacterOffsetEnd=3 PartOfSpeech=NNP]
[Text=Smith CharacterOffsetBegin=4 CharacterOffsetEnd=9 PartOfSpeech=NNP]
[Text=lives CharacterOffsetBegin=10 CharacterOffsetEnd=15 PartOfSpeech=VBZ]
[Text=in CharacterOffsetBegin=16 CharacterOffsetEnd=18 PartOfSpeech=IN]
[Text=California CharacterOffsetBegin=19 CharacterOffsetEnd=29 PartOfSpeech=NNP]
[Text=. CharacterOffsetBegin=29 CharacterOffsetEnd=30 PartOfSpeech=.]
c                  C   s   t jddd} | jtddd}t|t W d   n1 sw   Y  t jddd} | jtdd	}t|t W d   dS 1 sCw   Y  dS )
zd Test case of starting server with Spanish defaults, and then requesting default English properties spanishtest_spanish_english_request)
properties	server_idenglishtext)r   output_formatNtest_english_request)r	   )corenlpCoreNLPClientannotateEN_DOCr   EN_DOC_GOLD)clientann r   _/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/server/test_server_misc.pyr
   8   s   "r
   c               	   C   s   t jddg dd0} t jddg dd}|t}W d   n1 s%w   Y  W d   dS W d   dS 1 s=w   Y  dS )	z
    Test case of creating a client with start_server=False and a set of annotators
    The annotators should be used instead of the server's default annotators
    test_default_annotatorsr   )tokenizessplitposlemmanerdepparse)r   r	   
annotatorsF)r   r   r   )start_serverr	   r   N)r   r   r   r   )r   client2r   r   r   r   r   E   s   "r   )r            )      )	      )      )r   r    )r#   
   )      )      u   I am 𝒚̂𝒊 random textc                  C   s   t jddgddidH} | t}ttttD ]2\}\}}|jd j	| }|j
|d ks/J |j|d ks8J |j|d ksAJ |j|d ksJJ qW d   dS 1 sVw   Y  dS )	z? Test case of asking for codepoints from the English tokenizer r   r   tokenize.codepointtruer   r   r   r   N)r   r   r   codepoint_doc	enumeratezipexpected_codepointsexpected_characterssentencetokencodepointOffsetBegincodepointOffsetEnd	beginCharendChar)r   r   i
codepoints
charactersr7   r   r   r   test_codepointsV   s   

"r?   c                  C   s   d} t jddgddidJ}|| }|jd jd j}|jd jd j}| || }|d	ks1J |jd
 jd j}|jd
 jd j}| || }|dksOJ W d   dS 1 sZw   Y  dS )zD Test case of extracting the correct sentence text using codepoints u0   Unban mox opal 🐱.  This is a second sentence.r   r   r.   r/   r0   r   u   Unban mox opal 🐱.r   zThis is a second sentence.N)r   r   r   r6   r7   r8   r9   )r   r   r   
text_starttext_endsentence_textr   r   r   test_codepoint_textb   s   

"rD   )__doc__pytestrestanza.serverserverr   stanza.testsr   markr   
pytestmarkr   r   EN_DOC_POS_ONLY_GOLDr
   r   r4   r5   r1   r?   rD   r   r   r   r   <module>   s     