o
    h                     @   s~   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
T ddlmZmZ ejejgZeed	G d
d deZdS )z(
Processor for performing lemmatization
    )compressN)doc)
DataLoader)Trainer)*)UDProcessorregister_processor)namec                       sX   e Zd ZeegZeegZdZ fddZ	e
dd Zdd Zdd	 Zd
d Z  ZS )LemmaProcessori  c                    s    d | _ d | _t ||| d S N)_use_identity
_pretaggedsuper__init__)selfconfigpipelinedevice	__class__ Z/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/pipeline/lemma_processor.pyr      s   zLemmaProcessor.__init__c                 C   s   | j S r   )r   r   r   r   r   use_identity"   s   zLemmaProcessor.use_identityc                 C   s   | ddv rd| _|| _tj| jd< d S | dd| _d| _| dd | dd d	}t|}| d
d |d< t||d ||j	|d| _
d S )Nr   )TrueTT
batch_sizestore_resultsFforward_charlm_pathbackward_charlm_path)charlm_forward_filecharlm_backward_filepretrain_pathwordvec_pretrain_file
model_path)args
model_filer   foundation_cachelemma_classifier_args)getr   _configr
   DEFAULT_BATCH_SIZEr   r   dictr   r&   _trainer)r   r   r   r   r$   r'   r   r   r   _set_up_model&   s   


zLemmaProcessor._set_up_modelc                 C   sX   | j dd | _| jrt | _d S | jdr&| js&tj	tt
g| _d S tj| _d S )N	pretaggedpos)r)   r(   r   set	_requiresr   r   r
   REQUIRES_DEFAULTunionPOSr   r   r   r   _set_up_requires<   s   zLemmaProcessor._set_up_requiresc              	   C   s~  | j st|| jd | j| jddd}nt|| jd | jddd}| j r-dd |jjD }n| jddrC| j|jtj	tj
g}n| jd	dri| j|jtj	tj
g}t|| jd | j| jd|dd
}n|}t / g }g }t|D ]\}}| j|| jd |j\}	}
||	7 }|
d ur||
7 }qxW d    n1 sw   Y  | jd	dr|jt}dd |D }| jjdd t||D ||d}| jrt|tdd |}dd t||D }| jj|dd d}g }|D ]}|r|d q|||  |d7 }q| j||}n| jj|jtj	g||d}| j r,| j|j|}dd |D }|jtjg| |jS )Nr   T)vocab
evaluationexpand_unk_vocab)r7   
conll_onlyc                 S   s   g | ]}|j D ]}|jqqS r   )wordstext).0sentwordr   r   r   
<listcomp>K   s    z*LemmaProcessor.process.<locals>.<listcomp>	dict_onlyFensemble_dict)r6   r7   skipr8   	beam_sizec                 S   s   g | ]}|d  qS )r   r   r<   xr   r   r   r?   e   s    c                 S   s   g | ]\}}|s|qS r   r   r<   rE   yr   r   r   r?   f   s    )editsc                 S   s   |  S r   r   )rE   r   r   r   <lambda>h   s    z(LemmaProcessor.process.<locals>.<lambda>c                 S   s"   g | ]\}}|d  |d |fqS )r      r   rF   r   r   r   r?   i   s   " )update_word_dictr    rJ   c                 S   s$   g | ]}t t||fd gd qS ))r   _rJ   )maxlenrD   r   r   r   r?   |   s   $ )r   r   r   r6   r   	sentencesr(   trainerpredict_dictTEXTUPOSskip_seq2seqtorchno_grad	enumeratepredict	WORD_TAGSpostprocesszipr   r   map
train_dictappendensemblehas_contextual_lemmatizersupdate_contextual_predsr0   LEMMA)r   documentbatchpredsrB   seq2seq_batchrH   ibpses	word_tagsr:   new_word_tagsnew_predictionspreds1sr   r   r   processE   s\    
	"
zLemmaProcessor.process)__name__
__module____qualname__r0   rc   PROVIDES_DEFAULTTOKENIZEr2   r*   r   propertyr   r-   r5   rq   __classcell__r   r   r   r   r
      s    


	r
   )__doc__	itertoolsr   rV   stanza.models.commonr   stanza.models.lemma.datar   stanza.models.lemma.trainerr   stanza.pipeline._constantsstanza.pipeline.processorr   r   rS   rT   rZ   rc   r
   r   r   r   r   <module>   s    