o
    hC2                     @   s   d Z ddlZddlZddlZddlZddlmZ zddlZW n	 ey'   Y nw ddlm	Z
 ddlmZmZ ddlmZmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ edZdd ZG dd de
Z	dS )z;
A trainer class to handle training and testing of models.
    N)nn)Trainer)utilsloss)	load_bertload_bert_with_peftNoTransformerFoundationCachechuliu_edmonds_one_root)build_peft_wrapperload_peft_wrapper)Parser)
MultiVocabstanzac                    sR    fdd| dd D }| d }| d }| d }| d }| d }||||||fS )	z& Unpack a batch from the data loader. c                    s"   g | ]}|d ur|  nd qS N)to).0bdevice Y/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/depparse/trainer.py
<listcomp>   s   " z unpack_batch.<locals>.<listcomp>N               r   )batchr   inputsorig_idxword_orig_idxsentlenswordlenstextr   r   r   unpack_batch   s   r%   c                   @   sN   e Zd ZdZ		dddZdd Zddd	ZdddZdddZdddZ	dS )r   z  A trainer for training models. NFc	              
   C   s  d| _ d| _g | _t|}	|d ur'| ||||| |r&d| _ d| _g | _nH|| _|| _t| jd \}
}d }| jd rMd| jd< d}t	|
| jt
|d}
t|||d urW|jnd ||
|| jd |d| _| j|| _|   |rt|	| _| jd	rdd l}|j| jd
ddd d S d S )Nr   
bert_modeluse_peftTbert_finetunedepparseadapter_name
emb_matrixfoundation_cacher&   bert_tokenizerforce_bert_saved	peft_namewandb   all)log_freqlog	log_graph)global_steplast_best_stepdev_score_historycopydeepcopyloadargsvocabr   r   loggerr   embmodelr   _Trainer__init_optimgetr2   watch)selfr>   r?   pretrain
model_filer   r.   ignore_model_configreset_history	orig_argsr&   r/   r1   r2   r   r   r   __init__&   s8   


.zTrainer.__init__c                 C   s  | j ddr7| j dr7tj| j d | j| j d d| j d fd| j dd	| j d
d| j dd d| _n5tj| j d | j| j d d| j d fd| j dd	| j dd | j dd	| j d
d| j dd d
| _i | _| j ddr| j dr| j dd r| j D ]\}}|d }t	|| j d }|| j|< qd S d S d| jv rt
jjj| jd d| j d d}t	| jd | j d }t
jjj| jd ||g| j d gd| jd< d S d S )Nsecond_stageFsecond_optim	second_lrg?beta2gư>second_bert_learning_rateg        r'   bert_finetune_layers)betasepsbert_learning_rateis_peftrR   optimlrrU   weight_decaybert_weight_decay)rS   rT   rU   rY   rZ   rV   rR   second_warmup_steps
_schedulerbert_optimizerr   bert_start_finetuning)factortotal_itersbert_warmup_steps)
schedulers
milestonesbert_scheduler)r>   rD   r   get_split_optimizerrB   	optimizer	scheduleritemstransformers!get_constant_schedule_with_warmuptorchrW   lr_scheduler
ConstantLRSequentialLR)rF   namerf   warmup_schedulerzero_schedulerr   r   r   __init_optimT   sH   

 
zTrainer.__init_optimc                 C   s  t | j j}t||\}}}}}}	|\}
}}}}}}}}}}|r(| j  n| j  | j D ]}|	  q2| |
||||||||||||||	\}}|j
 }|rW|S |  tjj| j | jd  | j D ]}|  qn| j D ]}|  qz|S )Nmax_grad_norm)nextrB   
parametersr   r%   evaltrainrf   values	zero_graddataitembackwardrk   r   r   clip_grad_norm_r>   steprg   )rF   r   rv   r   r   r    r!   r"   r#   r$   word	word_mask	wordcharswordchars_maskuposxposufeats
pretrainedlemmaheaddepreloptr   _loss_valrg   r   r   r   updatev   s&   

*


zTrainer.updateTc                    s   t j j}t||\}}}}}|\}	}
}}}}}}}}}j  |	d}|	|
||||||||||||\}dd td D fddtD   fddt	|D }|rlt
||}|S )Nr   c                 S   s0   g | ]\}}t |d |d |f dd  qS )N   r	   )r   adjlr   r   r   r      s   0 z#Trainer.predict.<locals>.<listcomp>c                    s4   g | ]\ }j d   fddt|D qS )r   c                    s(   g | ]\}}d    |d   | qS )r   r   )r   jh)ipredsr   r   r         ( .Trainer.predict.<locals>.<listcomp>.<listcomp>)r?   unmap	enumerate)r   hs)r   rF   r   r   r      s   4 c                    s.   g | ]  fd dt   d D qS )c                    s(   g | ]}t  |   | gqS r   )str)r   r   )deprel_seqs	head_seqsr   r   r   r      r   r   r   )range)r   )r   r   r"   r   r   r      s   . )rt   rB   ru   r   r%   rv   sizezipr   r   r   unsort)rF   r   r   r   r   r    r!   r#   r$   r   r   r   r   r   r   r   r   r   r   r   
batch_sizer   pred_tokensr   )r   r   r   rF   r"   r   predict   s   

*zTrainer.predictc           	         s
   j  }|r fdd| D }|D ]}||= q| j  j j j jd} jddrCddl	m
} | j j j jd|d	< |rb jd urbd
d  j D |d< dd  j D |d< ztj||dd td| W d S  ty   td Y d S w )Nc                    s&   g | ]}| d d  jjv r|qS ).r   )splitrB   unsaved_modules)r   krF   r   r   r      s   & z Trainer.save.<locals>.<listcomp>)rB   r?   configr8   r9   r:   r'   Fr   )get_peft_model_state_dictr*   	bert_lorac                 S      i | ]	\}}||  qS r   
state_dict)r   r   r   r   r   r   
<dictcomp>       z Trainer.save.<locals>.<dictcomp>optimizer_state_dictc                 S   r   r   r   )r   r   rg   r   r   r   r      r   scheduler_state_dict)_use_new_zipfile_serializationzModel saved to {}z#Saving failed... continuing anyway.)rB   r   keysr?   r>   r8   r9   r:   rD   peftr   r&   r1   rf   rh   rg   rk   saver@   infoformatBaseExceptionwarning)	rF   filenameskip_modulessave_optimizermodel_stateskippedr   paramsr   r   r   r   r      s0   
zTrainer.savec              
   C   s6  zt j|dd dd}W n ty   td|  w |d | _|dur,| j| d| jvr6d| jd< |d	}|rGt	d
 d| jd< t
|d | _d}| jd r]|dur]|j}d}	d}
| jddrd}	t| jd d|\}}}
t||| jt|
}t	d|
 n$tdd |d  D rt	d| t|}d}	t| jd|\}}t| j| j|||||	|
d| _| jj|d dd |dur| j|| _|   |d}|r| D ]\}}| j| | q|d}|r| D ]\}}| j| | q|dd| _|dd| _|dt | _dS )z
        Load a model from file, with preloaded pretrain embeddings. Here we allow the pretrain to be None or a dummy input,
        and the actual use of pretrain embeddings will depend on the boolean config "pretrain" in the loaded args.
        c                 S   s   | S r   r   )storagelocr   r   r   <lambda>   s    zTrainer.load.<locals>.<lambda>T)weights_onlyzCannot load model from {}r   Nr&   r   z7Found peft weights for depparse; loading a peft adapterr'   r?   rG   Fr)   zLoaded peft with name %sc                 s   s    | ]}| d V  qdS )zbert_model.N)
startswith)r   xr   r   r   	<genexpr>   s    zTrainer.load.<locals>.<genexpr>rB   zModel %s has a finetuned transformer.  Not using transformer cache to make sure the finetuned version of the transformer isn't accidentally used elsewherer,   )strictr   r   r8   r   r9   r:   )rk   r=   r   r@   errorr   r>   r   rD   debugr   load_state_dictr?   rA   r   r   anyr   r   r   r   rB   r   rC   rh   rf   rg   r8   r9   listr:   )rF   r   rG   r>   r.   r   
checkpointlora_weightsr-   r0   r1   r&   r/   optim_state_dictr   stater   r   r   r   r=      s\   







zTrainer.load)NNNNNNFF)F)T)TF)NNN)
__name__
__module____qualname____doc__rL   rC   r   r   r   r=   r   r   r   r   r   $   s    
.
"

r   )r   r;   sysloggingrk   r   ri   ImportErrorstanza.models.common.trainerr   BaseTrainerstanza.models.commonr   r   %stanza.models.common.foundation_cacher   r   r   #stanza.models.common.chuliu_edmondsr
    stanza.models.common.peft_configr   r   stanza.models.depparse.modelr   stanza.models.pos.vocabr   	getLoggerr@   r%   r   r   r   r   <module>   s*    

