o
    hI3                     @   s   d Z ddlZddlZddlZddlmZ ddlZddlZddlm	Z	 ddl
m	  mZ ddlm  m  mZ ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlm Z  ddl!m"Z" e#dZ$dd Z%G dd de&Z'dS )z;
A trainer class to handle training and testing of models.
    N)Counter)nn)TEXTUPOS)load_charlm)Seq2SeqModel)!CharacterLanguageModelWordAdapter)utilsloss)edit)
MultiVocab)LemmaClassifierstanzac                    s4    fdd| dd D }| d }| d }|||fS )z& Unpack a batch from the data loader. c                    s"   g | ]}|d ur|  nd qS N)to).0bdevice V/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/lemma/trainer.py
<listcomp>      " z unpack_batch.<locals>.<listcomp>N      r   )batchr   inputsorig_idxtextr   r   r   unpack_batch   s   
r   c                   @   s   e Zd ZdZd$ddZdd Zd%dd	Zd&ddZd'ddZdd Z	dd Z
dd Zdd Zd(ddZdd Zdd Zdd Zd(d d!Zd'd"d#ZdS ))Trainerz  A trainer for training models. Nc                 C   s   |d ur|  |||| n!|| _|d rd | _n| |||| _|| _t | _t | _g | _| j	dd| _
| jd s~| j|| _| j	ddr`t| jd j| jd || _td nt| jd j|| _t| jd | j| jd	 | _d S d S )
N	dict_onlycaselessFr   charalphaz2Running seq2seq lemmatizer with edit classifier...optimlr)loadargsmodelbuild_seq2seqvocabdict	word_dictcomposite_dictcontextual_lemmatizersgetr"   r   r
   MixLosssizecritloggerdebugSequenceLossr	   get_optimizer	optimizer)selfr(   r+   
emb_matrix
model_filer   foundation_cachelemma_classifier_argsr   r   r   __init__#   s&   
""zTrainer.__init__c           	      C   s   d }g }|d ur| dd rt|d |d}|| |d ur2| dd r2t|d |d}|| t|dkrAt|}t|}t|||d}|S )Ncharlm_forward_file)r<   charlm_backward_filer   )r:   contextual_embedding)r0   r   appendlenr   
ModuleListr   r   )	r9   r(   r:   r<   	charmodelcharlmscharmodel_forwardcharmodel_backwardr)   r   r   r   r*   ?   s   


zTrainer.build_seq2seqFc                 C   s  t | j j}t||\}}}|\}}}	}
}}|r | j  n
| j  | j  | j|||	||d\}}| j	
ddrW|d usCJ | |d| jd j|
d||}n| |d| jd j|
d}|j }|rq|S |  tjj| j | j	d  | j  |S )N)rawr   Fr#   max_grad_norm)nextr)   
parametersr   r   evaltrainr8   	zero_gradr(   r0   r3   viewr+   r2   dataitembackwardtorchr   r	   clip_grad_norm_step)r9   r   rN   r   r   r   r   srcsrc_masktgt_intgt_outposedits	log_probsedit_logitsr
   loss_valr   r   r   updateN   s*   

"

zTrainer.update   c                    s    d u r| j  t| j j}t||\}}}|\}}	}
}}}| j  |d}| jj||	|||d\}} fdd|D }t	
|}dd |D }t	||}| jddrz|d us^J tj|j  dd	|g }t	||}||fS d }||fS )
Nr   )r\   	beam_sizerI   c                    s   g | ]	} d   |qS )r#   )unmap)r   idsr+   r   r   r   s       z#Trainer.predict.<locals>.<listcomp>c                 S   s   g | ]}d  |qS ) )join)r   seqr   r   r   r   u   s    r   Frb   )axis)r+   rL   r)   rM   r   r   rN   r2   predictr	   prune_decoded_seqsunsortr(   r0   npargmaxrR   cpunumpyreshapetolist)r9   r   rc   r+   r   r   r   r   rX   rY   tgttgt_maskr\   r]   
batch_sizepredsr_   	pred_seqspred_tokensr   rf   r   rl   h   s&   


$zTrainer.predictc           
      C   s   t |t |ksJ dg }| jddr<|dur!t |t |ks#J t|||D ]\}}}t|||}||g7 }q)n|}t |t |ksHJ g }	t||D ]\}}t |dks^tj|v rd|	|g7 }	qO|	|g7 }	qO|	S )z( Postprocess, mainly for handing edits. z1Lemma predictions must have same length as words.r   FNr   )rC   r(   r0   zipr   	edit_wordconstantUNK)
r9   wordsrx   r]   editedwpelemfinalr   r   r   postprocess   s    zTrainer.postprocessc                 C   s   | j d uot| j dkS Nr   )r/   rC   r9   r   r   r   has_contextual_lemmatizers   s   z"Trainer.has_contextual_lemmatizersc                 C   s   t | jdkr	|S t| jD ]W}g }g }g }g }tt||D ]%\}	\}
}||
|}|D ]}|| ||
 || ||	 q-qt |dkrLq||||}t|||D ]\}	}}|||	 |< qYq|S r   )rC   r/   reversed	enumerater{   target_indicesrB   rl   )r9   sentence_wordssentence_tagsrx   
contextualpred_idxpred_sent_wordspred_sent_tagspred_sent_idssent_idr   tagsindicesidxcontextual_predictionsword_idpredr   r   r   predict_contextual   s,   


zTrainer.predict_contextualc           	      C   s   t | jdkr	|S |jtgdd}|jtgdd}g }d}|D ]}|t | }||||  |t |7 }q| |||}dd |D }|S )a/  
        Update a flat list of preds with the output of the contextual lemmatizers

        - First, it unflattens the preds based on the lengths of the sentences
        - Then it uses the contextual lemmatizers
        - Finally, it reflattens the preds into the format expected by the caller
        r   T)as_sentencesc                 S   s   g | ]	}|D ]}|qqS r   r   )r   sentencelemmar   r   r   r      rg   z3Trainer.update_contextual_preds.<locals>.<listcomp>)rC   r/   r0   r   r   rB   r   )	r9   docrx   r   r   sentence_predsstart_indexsent	end_indexr   r   r   update_contextual_preds   s   zTrainer.update_contextual_predsc                 C   s   t | j| d S r   )r	   	change_lrr8   )r9   new_lrr   r   r   	update_lr   s   zTrainer.update_lrTc           	      C   sn   t  }|dd |D  | D ]#\}}|\}}}||f| jvr(|| j||f< |r4|| jvr4|| j|< qdS )a  
        Train a dict lemmatizer given training (word, pos, lemma) triples.

        Can update only the composite_dict (word/pos) in situations where
        the data might be limited from the tags, such as when adding more
        words at pipeline time
        c                 S   s"   g | ]}|d  |d |d fqS )r   rb      r   )r   r   r   r   r   r      r   z&Trainer.train_dict.<locals>.<listcomp>N)r   ra   most_commonr.   r-   )	r9   triplesupdate_word_dictctrr   _r   r\   lr   r   r   
train_dict   s   	

zTrainer.train_dictc                 C   sr   g }|D ]2}|\}}| j r| }||f| jv r#|| j||f g7 }q|| jv r1|| j| g7 }q||g7 }q|S )zH Predict a list of lemmas using the dict model given (word, pos) pairs. )r"   lowerr.   r-   )r9   pairslemmasr   r   r\   r   r   r   predict_dict   s   
zTrainer.predict_dictc                 C   sb   g }|D ]*}|\}}| j r| }||f| jv r|d q|| jv r)|d q|d q|S )zY Determine if we can skip the seq2seq module when ensembling with the frequency lexicon. TF)r"   r   r.   rB   r-   )r9   r   skipr   r   r\   r   r   r   skip_seq2seq   s   
zTrainer.skip_seq2seqc           	      C   s   g }t |t |ksJ t||D ]6\}}|\}}| jr | }||f| jv r/| j||f }n|| jv r:| j| }n|}|du rB|}|| q|S )z7 Ensemble the dict with statistical model predictions. N)rC   r{   r"   r   r.   r-   rB   )	r9   r   other_predsr   r   r   r   r\   r   r   r   r   ensemble   s   
zTrainer.ensemblec           	         s   d } j d ur! j  }|r! fdd| D }|D ]}||= q| j jf j  jg d} jD ]}|d |	  q4t
j|d }|rWt
jt
j|d dd tj||dd	 td
| d S )Nc                    s&   g | ]}| d d  jjv r|qS ).r   )splitr)   unsaved_modules)r   kr   r   r   r     s   & z Trainer.save.<locals>.<listcomp>)r)   dictsr+   configr   r   r   T)exist_okF)_use_new_zipfile_serializationzModel saved to {})r)   
state_dictkeysr-   r.   r+   r(   r/   rB   get_save_dictospathr   makedirsrU   saver4   infoformat)	r9   filenameskip_modulesmodel_stateskippedr   paramsr   save_dirr   r   r   r     s(   



zTrainer.savec                 C   s   zt j|dd dd}W n ty   td|  w |d | _|d ur4|d | jd< |d | jd< |d	 \| _| _| jd
 sU| 	| jd || _
| j
j|d dd nd | _
t|d | _g | _|dg D ]}| jtj||d qid S )Nc                 S   s   | S r   r   )storagelocr   r   r   <lambda>)  s    zTrainer.load.<locals>.<lambda>T)weights_onlyzCannot load model from {}r   r?   r@   r   r!   r)   F)strictr+   r   )r(   )rU   r'   BaseExceptionr4   errorr   r(   r-   r.   r*   r)   load_state_dictr   r+   r/   r0   rB   r   from_checkpoint)r9   r   r(   r<   r=   
checkpointr   r   r   r   r'   '  s(   

zTrainer.load)NNNNNNN)F)rb   Nr   )T)__name__
__module____qualname____doc__r>   r*   ra   rl   r   r   r   r   r   r   r   r   r   r   r'   r   r   r   r   r    !   s"    





r    )(r   r   sysrr   ro   collectionsr   loggingrU   r   torch.nn.initinit%stanza.models.common.seq2seq_constantmodelscommonseq2seq_constantr}   stanza.models.common.docr   r   %stanza.models.common.foundation_cacher   "stanza.models.common.seq2seq_modelr   stanza.models.common.char_modelr   stanza.models.commonr	   r
   stanza.models.lemmar   stanza.models.lemma.vocabr   )stanza.models.lemma_classifier.base_modelr   	getLoggerr4   r   objectr    r   r   r   r   <module>   s*    
