o
    h3                     @   s   d Z ddlZddlZddlZddlmZ ddlmZmZmZ ddl	m
Z
mZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ edZdd Zdd ZG dd deZdS )z;
A trainer class to handle training and testing of models.
    N)nn)NoTransformerFoundationCache	load_bertload_bert_with_peft)build_peft_wrapperload_peft_wrapper)Trainer)VOCAB_PREFIXVOCAB_PREFIX_SIZE)utilsloss)	NERTagger)
MultiVocabviterbi_decodestanzac           
         st   | d g}| fdd| dd D 7 }| d }| d }| d }| d }| d	 }| d
 }| d }	||||||||	fS )z& Unpack a batch from the data loader. r   c                    s"   g | ]}|d ur|  nd qS N)to).0bdevice T/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/ner/trainer.py
<listcomp>      " z unpack_batch.<locals>.<listcomp>               	   
      r   )
batchr   inputsorig_idxword_orig_idxchar_orig_idxsentlenswordlenscharlenscharoffsetsr   r   r   unpack_batch   s   
r-   c                 C   s  t | }t|D ]f\}}|dr?|t|d ks5||d  d|dd  kr?||d  d|dd  kr?d|dd  ||< |drn|dksd||d  d|dd  krn||d  d|dd  krnd|dd  ||< qt|D ]f\}}|dr|t|d ks||d  d|dd  kr||d  d|dd  krd|dd  ||< |dr|dks||d  d|dd  kr||d  d|dd  krd|dd  ||< qs|S )	zF
    If there are any singleton B- or E- tags, convert them to S-
    I-r      NE-r   B-S-)list	enumerate
startswithlen)tagsnew_tagsidxtagr   r   r   fix_singleton_tags#   s,   
8
8
8
8r;   c                   @   sN   e Zd ZdZ		dddZdddZdd	d
ZdddZdddZdd Z	dS )r   z  A trainer for training models. NFc	              
   C   s  |d ur|  |||| n]tdd |||fD sJ || _|| _t| jd \}	}
d }| jd rAd| jd< d}t|	| jt|d}	t|||j||	|
| jd |d	| _	| j
d
drj| j
ddrj| j	jjddid | j
dd d u rxd| jd< |rtd ddg}| j	 D ]\}}|dd |vrd|_q| j	|| _	|stj| jd | j	| jd | jd | j
dd| j
dd| _d S tj| jd | j	| jd | jd | j
dd| j
dd| _d S )Nc                 s   s    | ]}|d uV  qd S r   r   )r   varr   r   r   	<genexpr>G   s    z#Trainer.__init__.<locals>.<genexpr>
bert_modeluse_peftTbert_finetuneneradapter_name
emb_matrixfoundation_cacher>   bert_tokenizerforce_bert_saved	peft_namegradient_checkpointingFuse_reentrant)gradient_checkpointing_kwargspredict_tagsetr   z,Disabling gradient for non-classifier layerstag_clfcrit.optimlrmomentumbert_learning_rateg        )rS   rT   is_peftsecond_optim	second_lrsecond_bert_learning_rate)loadallargsvocabr   r   loggerr   embmodelgetr>   gradient_checkpointing_enableinfonamed_parameterssplitrequires_gradr   r   get_optimizer	optimizer)selfr[   r\   pretrain
model_filer   train_classifier_onlyrF   rV   r>   rG   rI   excludepnamepr   r   r   __init__A   s6   

"

BBzTrainer.__init__c                 C   s   t | j j}t||\}}}}}}	}
}|\}}}}}|r$| j  n
| j  | j  | |||||||	|||
|\}}}|j	
 }|rI|S |  tjj| j | jd  | j  |S )Nmax_grad_norm)nextr_   
parametersr   r-   evaltrainrg   	zero_graddataitembackwardtorchr   r   clip_grad_norm_r[   step)rh   r$   rs   r   r%   r&   r'   r(   r)   r*   r+   r,   word	wordcharswordchars_maskcharsr7   r   _loss_valr   r   r   updatep   s   

$

zTrainer.updateTc                    sF  t | j j}t||\}}}}}}	}
|\}}}}}| j  | ||||||||
|	|\}}}dd |D }dd |D }|d jd  t fdd|D rWtdg }| j	d t
 D ]6fd	dt||D }d
d |D }tt| }| jd |}fdd|D }t|}||g7 }qb|rt||}|S )Nc                 S      g | ]	}|j   qS r   rv   cpunumpyr   xr   r   r   r          z#Trainer.predict.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r   r      r   r   c                 3   s    | ]
}|j d   kV  qdS )r   N)shaper   )
batch_sizer   r   r=      s    z"Trainer.predict.<locals>.<genexpr>z0Expected all of the logits to have the same sizerM   c                    s.   g | ]\}}t | d   f |d qS )Nr   r   )r   r   y)ir)   r   r   r      s   . c                 S   s   g | ]	}d d |D qS )c                 S   s   g | ]
}|t kr
|nt qS r   )r
   r   r   r   r   r      s    z.Trainer.predict.<locals>.<listcomp>.<listcomp>r   )r   r   r   r   r   r      r   r:   c                    s"   g | ]}t |tr|  n|qS r   )
isinstancer3   r   )rM   r   r   r      r   )rq   r_   rr   r   r-   rs   r   anyAssertionErrorr[   rangezipr3   r\   unmapr;   r   unsort)rh   r$   r   r   r%   r&   r'   r(   r*   r+   r,   r|   r}   r~   r   r7   r   logitstranstag_seqsr   )r   r   rM   r)   r   predict   s.   
$
zTrainer.predictc              	      s    j  }|r fdd| D }|D ]}||= q| j  jd} jd r;ddlm} | j j j jd|d< zt	j
||d	d
 td| W d S  ttfyX       td Y d S )Nc                    s&   g | ]}| d d  jjv r|qS )rP   r   )rd   r_   unsaved_modules)r   krh   r   r   r      s   & z Trainer.save.<locals>.<listcomp>)r_   r\   configr?   r   )get_peft_model_state_dictrB   	bert_loraF)_use_new_zipfile_serializationzModel saved to {}z#Saving failed... continuing anyway.)r_   
state_dictkeysr\   r[   peftr   r>   rI   ry   saver]   rb   formatKeyboardInterrupt
SystemExitwarning)rh   filenameskip_modulesmodel_stateskippedr   paramsr   r   r   r   r      s&   

zTrainer.savec              
   C   sT  zt j|dd dd}W n ty   td|  w |d | _|r*| j| dD ]}| j|d d u rB|d |d | j|< q,|d}|rTt	d	 d| jd
< t
|d | _d }|d ure|j}d}	d }
| jd
drd}	t| jd d|\}}}
t||| jt|
}t	d|
 n$tdd |d  D rt	d| t|}d}	t| jd|\}}tdd |d  D rt	d |d d|d d< |d d|d d< |d d|d d< t| j| j|||||	|
d| _| jj|d dd d| jjvr$d|d  v r&d | jjv r(t	d!| | jjd  d S d S d S d S )"Nc                 S   s   | S r   r   )storagelocr   r   r   <lambda>   s    zTrainer.load.<locals>.<lambda>T)weights_onlyzCannot load model from {}r   )rM   train_schemeschemer   z2Found peft weights for NER; loading a peft adapterr?   r\   Fr>   rA   zLoaded peft with name %sc                 s       | ]}| d V  qdS )zbert_model.Nr5   r   r   r   r   r=          zTrainer.load.<locals>.<genexpr>r_   zModel %s has a finetuned transformer.  Not using transformer cache to make sure the finetuned version of the transformer isn't accidentally used elsewherec                 s   r   )zcrit.Nr   r   r   r   r   r=      r   zNOld model format detected.  Updating to the new format with one column of tagszcrit._transitionszcrits.0._transitionsztag_clf.weightztag_clfs.0.weightztag_clf.biasztag_clfs.0.biasrD   )strictdeltazword_emb.weightword_embzXRemoving word_emb from unsaved_modules so that resaving %s will keep the saved embedding)ry   rY   BaseExceptionr]   errorr   r[   r   r`   debugr   load_state_dictr\   r^   r   r   r   r   r   r   popr   r_   r   remove)rh   r   ri   r[   rF   
checkpointkeep_arglora_weightsrE   rH   rI   r>   rG   r   r   r   rY      sV   




.	zTrainer.loadc                 C   sj   t  }| jd dD ]%}|tv rq|dkrqt|dkr+|dd dv r+|dd }|| qt|S )zn
        Return the tags known by this model

        Removes the S-, B-, etc, and does not include O
        r:   r   Or/   N)r2   r1   r.   r0   )setr\   itemsr	   r6   addsorted)rh   r7   r:   r   r   r   get_known_tags   s   zTrainer.get_known_tags)NNNNNFNF)F)T)NNN)
__name__
__module____qualname____doc__ro   r   r   r   rY   r   r   r   r   r   r   ?   s    

/

'
:r   )r   sysloggingry   r   %stanza.models.common.foundation_cacher   r   r    stanza.models.common.peft_configr   r   stanza.models.common.trainerr   BaseTrainerstanza.models.common.vocabr	   r
   stanza.models.commonr   r   stanza.models.ner.modelr   stanza.models.ner.vocabr   stanza.models.common.crfr   	getLoggerr]   r-   r;   r   r   r   r   <module>   s"    
