o
    hp                     @   s   d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlT dd	lmZmZ dd
lmZ e ZeedG dd deZdS )z1
Processor for performing part-of-speech tagging
    N)doc)unsort)VOCAB_PREFIXCompositeVocab)Dataset)Trainer)*)UDProcessorregister_processor)get_tqdm)namec                   @   sX   e Zd ZeegZeegZdd Zdd Z	dd Z
dd Zd	d
 Zdd Zdd ZdS )POSProcessorc                 C   sj   d|v r|j |d nd | _|dd |dd d}t| j|d |||j d| _d|v o1|d | _d S )Npretrain_pathforward_charlm_pathbackward_charlm_path)charlm_forward_filecharlm_backward_file
model_path)pretrain
model_filedeviceargsfoundation_cachetqdm)r   load_pretrain	_pretraingetr   r   _trainer_tqdm)selfconfigpipeliner   r    r"   X/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/pipeline/pos_processor.py_set_up_model   s   

zPOSProcessor._set_up_modelc                 C   s   d| j d  S )NzPOSProcessor(%s)r   )r    r   r"   r"   r#   __str__#   s   zPOSProcessor.__str__c                 C   st   t | jd tr-t| jd dkr dd | jd d j D S dd | jd j D S dd | jd j D S )	z;
        Returns the xpos tags known by this model
        xpos   c                 S      g | ]}|t vr|qS r"   r   .0kr"   r"   r#   
<listcomp>,       z/POSProcessor.get_known_xpos.<locals>.<listcomp>r   c                 S      i | ]\}}||  t qS r"   keysr   r,   r-   vr"   r"   r#   
<dictcomp>.       z/POSProcessor.get_known_xpos.<locals>.<dictcomp>c                 S   r)   r"   r*   r+   r"   r"   r#   r.   /   r/   )
isinstancevocabr   len_unit2idr2   itemsr%   r"   r"   r#   get_known_xpos&   s
   zPOSProcessor.get_known_xposc                 C   s   t | jd tS )zH
        Returns if the xpos tags are part of a composite vocab
        r'   )r7   r8   r   r%   r"   r"   r#   is_composite_xpos1   s   zPOSProcessor.is_composite_xposc                 C      dd | j d j D }|S )z;
        Returns the upos tags known by this model
        c                 S   r)   r"   r*   r+   r"   r"   r#   r.   ;   r/   z/POSProcessor.get_known_upos.<locals>.<listcomp>upos)r8   r:   r2   )r   r2   r"   r"   r#   get_known_upos7      zPOSProcessor.get_known_uposc                 C   r>   )z:
        Returns the features known by this model
        c                 S   r0   r"   r1   r3   r"   r"   r#   r5   B   r6   z0POSProcessor.get_known_feats.<locals>.<dictcomp>feats)r8   r:   r;   )r   valuesr"   r"   r#   get_known_feats>   rA   zPOSProcessor.get_known_featsc           	      C   s   | j dd}t|| j | j| jddd}t|j| j d |d}g }g }t ' | j	r0t
|}t|D ]\}}||d  || j|7 }q4W d    n1 sRw   Y  t||}|jtjtjtjgdd	 |D  |jS )
Nbatch_maximum_tokensi  T)r8   
evaluationsort_during_eval
batch_size)rH   maximum_tokensc                 S   s   g | ]	}|D ]}|qqS r"   r"   )r,   xyr"   r"   r#   r.   X   s    z(POSProcessor.process.<locals>.<listcomp>)r    r   r   r   r8   iterto_length_limited_loadertorchno_gradr   r   	enumerateextendtrainerpredictr   r   setUPOSXPOSFEATS)	r   documentrI   datasetbatchpredsidxibr"   r"   r#   processE   s&   

$zPOSProcessor.processN)__name__
__module____qualname__rU   POSPROVIDES_DEFAULTTOKENIZEREQUIRES_DEFAULTr$   r&   r<   r=   r@   rD   r`   r"   r"   r"   r#   r      s    

	r   )__doc__rO   stanza.models.commonr   stanza.models.common.utilsr   stanza.models.common.vocabr   r   stanza.models.pos.datar   stanza.models.pos.trainerr   stanza.pipeline._constantsstanza.pipeline.processorr	   r
   stanza.utils.get_tqdmr   r   rd   r   r"   r"   r"   r#   <module>   s    