o
    –hR  ã                   @   sx   d Z ddlZddlZddlmZ ddlmZ G dd„ dƒZedkr:edd	d
gƒZ	ej
 ej
 e¡d¡Ze	 e¡ dS dS )zß
Baseline model for the existing lemmatizer which always predicts "be" and never "have" on the "'s" token.

The BaselineModel class can be updated to any arbitrary token and predicton lemma, not just "be" on the "s" token.
é    N)Úevaluate_sequences)Úload_doc_from_conll_filec                   @   s$   e Zd Zdd„ Zdd„ Zdd„ ZdS )ÚBaselineModelc                 C   s   || _ || _|| _d S ©N)Útoken_to_lemmatizeÚprediction_lemmaÚprediction_upos)Úselfr   r   r   © r
   úh/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/lemma_classifier/baseline_model.pyÚ__init__   s   
zBaselineModel.__init__c                 C   s   || j kr| jS d S r   )r   r   )r	   Útokenr
   r
   r   Úpredict   s   
ÿzBaselineModel.predictc                 C   s˜   t |ƒ}g g }}|jD ]3}g g }}|jD ]}|j| jv r4|j| jkr4| j}	|j}
| 	|
¡ | 	|	¡ q| 	|¡ | 	|¡ qt
||ƒ\}}}||fS )aQ  
        Evaluates the baseline model against the test set defined in conll_path.

        Returns a map where the keys are each class and the values are another map including the precision, recall and f1 scores
        for that class.

        Also returns confusion matrix. Keys are gold tags and inner keys are predicted tags
        )r   Ú	sentencesÚwordsÚuposr   Útextr   r   ÚlemmaÚappendr   )r	   Ú
conll_pathÚdocÚgold_tag_sequencesÚpred_tag_sequencesÚsentenceÚ	gold_tagsÚ	pred_tagsÚwordÚpredÚgoldÚmulticlass_resultÚconfusion_mtxÚweighted_f1r
   r
   r   Úevaluate   s   	





€
zBaselineModel.evaluateN)Ú__name__Ú
__module__Ú__qualname__r   r   r"   r
   r
   r
   r   r      s    r   Ú__main__z'sÚbeÚAUXzen_gum-ud-train.conllu)Ú__doc__ÚstanzaÚosÚ.stanza.models.lemma_classifier.evaluate_modelsr   Ú.stanza.models.lemma_classifier.prepare_datasetr   r   r#   Úbl_modelÚpathÚjoinÚdirnameÚ__file__Ú
coNLL_pathr"   r
   r
   r
   r   Ú<module>   s    %ü