o
    hU                     @   sF  d dl Z d dlZd dlm  m  mZ d dlmZ d dlm	Z	m
Z
 d dlmZ g dg dg dgZded  d	d
ed d	ded d	gZg dZded  ed  dd
ed ed dded ed dgZejdddd Zejdddd Zejdddd Zejdddd Zejdddd ZG dd dZdS )    N)WVType)PADUNK)Tree)IhatetheOpalbanning)Tellmywifehello)r   likezSh'reyanz'santennae0)	sentimenttext1   2   )zL(ROOT (S (NP (PRP I)) (VP (VBP hate) (NP (DT the) (NN Opal) (NN banning)))))zB(ROOT (S (VP (VB Tell) (NP (PRP$ my) (NN wife)) (NP (UH hello)))))zX(ROOT (S (NP (PRP I)) (VP (VBP like) (NP (NP (NNP Sh'reyan) (POS 's)) (NNS antennae))))))r   r   constituencymodule)scopec                 C   Z   t d }| dd }t|ddd}tj||dd W d    |S 1 s&w   Y  |S )	N   dataz
train.jsonwutf-8encodingFensure_asciiDATASETmktempopenjsondumptmp_path_factory	train_settrain_filenamefout r/   ]/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/classifiers/test_data.py
train_file!      
r1   c                 C   r   )	Nr   r   zdev.jsonr   r   r    Fr"   r$   r+   dev_setdev_filenamer.   r/   r/   r0   dev_file)   r2   r6   c                 C   sV   t }| dd }t|ddd}tj||dd W d    |S 1 s$w   Y  |S )Nr   z	test.jsonr   r   r    Fr"   r$   )r+   test_settest_filenamer.   r/   r/   r0   	test_file1   s   
r9   c                 C   r   )	Nr   r   ztrain_trees.jsonr   r   r    Fr"   DATASET_WITH_TREESr&   r'   r(   r)   r*   r/   r/   r0   train_file_with_trees9   r2   r<   c                 C   r   )	Nr   r   zdev_trees.jsonr   r   r    Fr"   r:   r3   r/   r/   r0   dev_file_with_treesA   r2   r=   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )TestClassifierDatac                 C   s(   t t|tjd}t|dksJ dS z1
        Test reading of the json format
        r   <   N)r   read_datasetstrr   OTHERlen)selfr1   r,   r/   r/   r0   test_read_dataJ   s   z!TestClassifierData.test_read_datac                 C   s|   t t|tjd}t|dksJ t|D ]\}}t|jt	s"J t|jt
|tt
  ks1J qt t|tjd}dS r?   )r   rA   rB   r   rC   rD   	enumerate
isinstancer   r   TREES)rE   r1   r<   train_trees_setidxxr,   r/   r/   r0   test_read_data_with_treesQ   s    z,TestClassifierData.test_read_data_with_treesc                 C   sL   t t|tjd}t |}tttgdd t	D  }t||ks$J dS )zj
        Converting a dataset to vocab should have a specific set of words along with PAD and UNK
        r   c                 S   s   g | ]}|D ]}|  qqS r/   )lower).0yrL   r/   r/   r0   
<listcomp>c   s    z9TestClassifierData.test_dataset_vocab.<locals>.<listcomp>N)
r   rA   rB   r   rC   dataset_vocabsetr   r   	SENTENCES)rE   r1   r,   vocabexpectedr/   r/   r0   test_dataset_vocab]   s   
z%TestClassifierData.test_dataset_vocabc                 C   s2   t t|tjd}t |}|g dksJ dS )z>
        Test the extraction of labels from a dataset
        r   )r   r   r   N)r   rA   rB   r   rC   dataset_labelsrE   r1   r,   labelsr/   r/   r0   test_dataset_labelsf   s   
z&TestClassifierData.test_dataset_labelsc                 C   sv   t t|tjd}t |}t| ddgksJ t|d t|d ks)J t|d dt| d ks9J dS )zA
        There are two unique lengths in the toy dataset
        r            r   N)	r   rA   rB   r   rC   sort_dataset_by_lenlistkeysrD   )rE   r1   r,   sorted_datasetr/   r/   r0   test_sort_by_lengthn   s
   
$z&TestClassifierData.test_sort_by_lengthc                 C   s   t t|tjd}ttdd tD }t|dksJ t 	|| t
t t 	|dd | W d   dS 1 s>w   Y  dS )zH
        Check that an exception is thrown for an unknown label
        r   c                 S   s   g | ]}|d  qS )r   r/   )rO   rL   r/   r/   r0   rQ   }   s    z8TestClassifierData.test_check_labels.<locals>.<listcomp>N)r   rA   rB   r   rC   sortedrS   r%   rD   check_labelspytestraisesRuntimeErrorrY   r/   r/   r0   test_check_labelsx   s   "z$TestClassifierData.test_check_labelsN)	__name__
__module____qualname__rF   rM   rW   r[   rc   ri   r/   r/   r/   r0   r>   I   s    	
r>   )r(   rf   stanza.models.classifiers.datamodelsclassifiersr   stanza.models.classifiers.utilsr   stanza.models.common.vocabr   r   %stanza.models.constituency.parse_treer   rT   r%   rI   r;   fixturer1   r6   r9   r<   r=   r>   r/   r/   r/   r0   <module>   s<    









