o
    h^                     @   sZ   d Z ddlZddlZddlmZ ddlT ddlmZmZ ej	j
ZdZdZG dd	 d	ZdS )
z
Basic testing of French pipeline

The benefit of this test is to verify that the bulk processing works
for languages with MWT in them
    NDocument)*)check_on_gpucheck_on_cpuu   Alors encore inconnu du grand public, Emmanuel Macron devient en 2014 ministre de l'Économie, de l'Industrie et du Numérique.u)  
[
  [
    {
      "id": 1,
      "text": "Alors",
      "lemma": "alors",
      "upos": "ADV",
      "head": 3,
      "deprel": "advmod",
      "start_char": 0,
      "end_char": 5
    },
    {
      "id": 2,
      "text": "encore",
      "lemma": "encore",
      "upos": "ADV",
      "head": 3,
      "deprel": "advmod",
      "start_char": 6,
      "end_char": 12
    },
    {
      "id": 3,
      "text": "inconnu",
      "lemma": "inconnu",
      "upos": "ADJ",
      "feats": "Gender=Masc|Number=Sing",
      "head": 11,
      "deprel": "advcl",
      "start_char": 13,
      "end_char": 20
    },
    {
      "id": [
        4,
        5
      ],
      "text": "du",
      "start_char": 21,
      "end_char": 23
    },
    {
      "id": 4,
      "text": "de",
      "lemma": "de",
      "upos": "ADP",
      "head": 7,
      "deprel": "case"
    },
    {
      "id": 5,
      "text": "le",
      "lemma": "le",
      "upos": "DET",
      "feats": "Definite=Def|Gender=Masc|Number=Sing|PronType=Art",
      "head": 7,
      "deprel": "det"
    },
    {
      "id": 6,
      "text": "grand",
      "lemma": "grand",
      "upos": "ADJ",
      "feats": "Gender=Masc|Number=Sing",
      "head": 7,
      "deprel": "amod",
      "start_char": 24,
      "end_char": 29
    },
    {
      "id": 7,
      "text": "public",
      "lemma": "public",
      "upos": "NOUN",
      "feats": "Gender=Masc|Number=Sing",
      "head": 3,
      "deprel": "obl:arg",
      "start_char": 30,
      "end_char": 36,
      "misc": "SpaceAfter=No"
    },
    {
      "id": 8,
      "text": ",",
      "lemma": ",",
      "upos": "PUNCT",
      "head": 3,
      "deprel": "punct",
      "start_char": 36,
      "end_char": 37
    },
    {
      "id": 9,
      "text": "Emmanuel",
      "lemma": "Emmanuel",
      "upos": "PROPN",
      "head": 11,
      "deprel": "nsubj",
      "start_char": 38,
      "end_char": 46
    },
    {
      "id": 10,
      "text": "Macron",
      "lemma": "Macron",
      "upos": "PROPN",
      "head": 9,
      "deprel": "flat:name",
      "start_char": 47,
      "end_char": 53
    },
    {
      "id": 11,
      "text": "devient",
      "lemma": "devenir",
      "upos": "VERB",
      "feats": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin",
      "head": 0,
      "deprel": "root",
      "start_char": 54,
      "end_char": 61
    },
    {
      "id": 12,
      "text": "en",
      "lemma": "en",
      "upos": "ADP",
      "head": 13,
      "deprel": "case",
      "start_char": 62,
      "end_char": 64
    },
    {
      "id": 13,
      "text": "2014",
      "lemma": "2014",
      "upos": "NUM",
      "feats": "Number=Plur",
      "head": 11,
      "deprel": "obl:mod",
      "start_char": 65,
      "end_char": 69
    },
    {
      "id": 14,
      "text": "ministre",
      "lemma": "ministre",
      "upos": "NOUN",
      "feats": "Gender=Masc|Number=Sing",
      "head": 11,
      "deprel": "xcomp",
      "start_char": 70,
      "end_char": 78
    },
    {
      "id": 15,
      "text": "de",
      "lemma": "de",
      "upos": "ADP",
      "head": 17,
      "deprel": "case",
      "start_char": 79,
      "end_char": 81
    },
    {
      "id": 16,
      "text": "l'",
      "lemma": "le",
      "upos": "DET",
      "feats": "Definite=Def|Number=Sing|PronType=Art",
      "head": 17,
      "deprel": "det",
      "start_char": 82,
      "end_char": 84,
      "misc": "SpaceAfter=No"
    },
    {
      "id": 17,
      "text": "Économie",
      "lemma": "économie",
      "upos": "NOUN",
      "feats": "Gender=Fem|Number=Sing",
      "head": 14,
      "deprel": "nmod",
      "start_char": 84,
      "end_char": 92,
      "misc": "SpaceAfter=No"
    },
    {
      "id": 18,
      "text": ",",
      "lemma": ",",
      "upos": "PUNCT",
      "head": 21,
      "deprel": "punct",
      "start_char": 92,
      "end_char": 93
    },
    {
      "id": 19,
      "text": "de",
      "lemma": "de",
      "upos": "ADP",
      "head": 21,
      "deprel": "case",
      "start_char": 94,
      "end_char": 96
    },
    {
      "id": 20,
      "text": "l'",
      "lemma": "le",
      "upos": "DET",
      "feats": "Definite=Def|Number=Sing|PronType=Art",
      "head": 21,
      "deprel": "det",
      "start_char": 97,
      "end_char": 99,
      "misc": "SpaceAfter=No"
    },
    {
      "id": 21,
      "text": "Industrie",
      "lemma": "industrie",
      "upos": "NOUN",
      "feats": "Gender=Fem|Number=Sing",
      "head": 17,
      "deprel": "conj",
      "start_char": 99,
      "end_char": 108
    },
    {
      "id": 22,
      "text": "et",
      "lemma": "et",
      "upos": "CCONJ",
      "head": 25,
      "deprel": "cc",
      "start_char": 109,
      "end_char": 111
    },
    {
      "id": [
        23,
        24
      ],
      "text": "du",
      "start_char": 112,
      "end_char": 114
    },
    {
      "id": 23,
      "text": "de",
      "lemma": "de",
      "upos": "ADP",
      "head": 25,
      "deprel": "case"
    },
    {
      "id": 24,
      "text": "le",
      "lemma": "le",
      "upos": "DET",
      "feats": "Definite=Def|Gender=Masc|Number=Sing|PronType=Art",
      "head": 25,
      "deprel": "det"
    },
    {
      "id": 25,
      "text": "Numérique",
      "lemma": "numérique",
      "upos": "NOUN",
      "feats": "Gender=Masc|Number=Sing",
      "head": 17,
      "deprel": "conj",
      "start_char": 115,
      "end_char": 124,
      "misc": "SpaceAfter=No"
    },
    {
      "id": 26,
      "text": ".",
      "lemma": ".",
      "upos": "PUNCT",
      "head": 11,
      "deprel": "punct",
      "start_char": 124,
      "end_char": 125,
      "misc": "SpaceAfter=No"
    }
  ]
]
c                   @   s@   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	dS )TestFrenchPipelineclass)scopec                 C   s   t jdtdd}|S )z& Create a pipeline with French models ztokenize,mwt,pos,lemma,depparsefr)
processorsdirlang)stanzaPipelineTEST_MODELS_DIRselfpipeline r   e/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/pipeline/test_french_pipeline.pyr   >  s   zTestFrenchPipeline.pipelinec                 C   s   |t }tt|t d S )N)FR_MWT_SENTENCEcompare_ignoring_whitespacestrEXPECTED_RESULT)r   r   docr   r   r   test_singleD  s   zTestFrenchPipeline.test_singlec                 C   s~   d}t g| }dd |D }||}t||ksJ |D ] }tt|t t|jdks.J |jdks5J |jdks<J qd S )N
   c                 S   s   g | ]}t g |d qS ))textr   ).0
doccontentr   r   r   
<listcomp>K  s    z0TestFrenchPipeline.test_bulk.<locals>.<listcomp>         )r   lenr   r   r   	sentences	num_words
num_tokens)r   r   NUM_DOCSraw_textraw_docresultr   r   r   r   	test_bulkH  s   
zTestFrenchPipeline.test_bulkc                 C   s   t | dS )zL
        The default pipeline should have all the models on the GPU
        N)r   r   r   r   r   test_on_gpuV  s   zTestFrenchPipeline.test_on_gpuc                 C   s   t jdtdd}t| dS )zP
        Create a pipeline on the CPU, check that all the models on CPU
        r
   F)r   use_gpuN)r   r   r   r   r   r   r   r   test_on_cpu\  s   zTestFrenchPipeline.test_on_cpuN)
__name__
__module____qualname__pytestfixturer   r   r,   r-   r/   r   r   r   r   r   =  s    

r   )__doc__r3   r   stanza.models.common.docr   stanza.tests+stanza.tests.pipeline.pipeline_device_testsr   r   markr   
pytestmarkr   r   r   r   r   r   r   <module>   s      *