o
    h                     @   s  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4 e4 Z5e6dZ7edg dZ8G dd dedg dZ9dd Z:dd Z;dd  Z<d!d" Z=d#d$ Z>d%d& Z?d'd( Z@d)d* ZAd+d, ZBd-d. ZCd/d0 ZDd3d1d2ZEdS )4    )Counter
namedtupleN)nn)utils)FoundationCacheNoTransformerFoundationCache)LargeMarginInSoftmaxLoss)sort_with_indicesunsort)parse_transitions)transition_sequence)tree_reader)InOrderCompoundOracle)InOrderOracle)	LSTMModel)TransitionScheme)Tree)TopDownOracle)Trainer)	retag_treesbuild_optimizerbuild_schedulerverify_transitionsget_open_nodescheck_constituentscheck_root_labelsremove_duplicate_treesremove_singleton_trees)EvaluateParserParseResult)get_tqdmzstanza.constituency.trainer	TrainItem)treegold_sequencepreterminalsc                   @   s   e Zd Zdd ZdS )
EpochStatsc                 C   sZ   | j |j  }| j|j }| j|j }| j|j }| j|j }| j|j }t||||||S N)transitions_correcttransitions_incorrectrepairs_usedfake_transitions_used
epoch_lossnansr%   )selfotherr'   r(   r)   r*   r+   r,    r/   e/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/constituency/parser_training.py__add__&   s   zEpochStats.__add__N)__name__
__module____qualname__r1   r/   r/   r/   r0   r%   %   s    r%   )r+   r'   r(   r)   r*   r,   c                 C   sf  | d dkr| d d }nd}t |d}|r|d jnt }| d | d | d | d	 d
}tj|||d}| d r>|  t| d }t	dt
| |}	|duro|jj}
|
dk}t	d|
| d  t|||}	t	d | d rw|  t|j|	|| |\}}}t	d| d | |durt	d| d | W d   dS W d   dS 1 sw   Y  dS )z
    Loads the given model file and tests the eval_file treebank.

    May retag the trees using retag_pipeline
    Uses a subprocess to run the Java EvalB code
    num_generater      Nkbestwordvec_pretrain_filecharlm_forward_filecharlm_backward_filedevicer9   r:   r;   r<   )argsfoundation_cache
log_shapes	eval_filezRead %d trees for evaluationxpos8Retagging trees using the %s tags from the %s package...retag_packageRetagging finished	log_normszF1 score on %s: %fzKBest F1 score on %s: %f)r   r?   r   r   loadr@   r   read_treebanktloggerinfolenmodelretag_methodr   rF   run_dev_set)r>   
model_fileretag_pipeliner8   	evaluatorr?   	load_argstrainertreebankretagged_treebankrM   
retag_xposf1kbestF1_r/   r/   r0   evaluate/   s>   
"rZ   c                 C   s<   | d | d | d | d d}t j||dd}|| dS )	zn
    A utility method to remove the optimizer from a save file

    Will make the save file a lot smaller
    r9   r:   r;   r<   r=   F)r>   load_optimizerN)r   rG   save)r>   model_save_filemodel_load_filerR   rS   r/   r/   r0   remove_optimizer_   s   
r_   c                    s8    dur| j  D ]}|jr| fdd q	dS dS )zP
    Adds a torch.clamp hook on each parameter if grad_clipping is not None
    Nc                    s   t |    S r&   )torchclamp)gradgrad_clippingr/   r0   <lambda>x   s    z#add_grad_clipping.<locals>.<lambda>)rL   
parametersrequires_gradregister_hook)rS   rd   pr/   rc   r0   add_grad_clippingq   s   rj   c                 C   s  t |}td| | d r#t||d| d d t||d| d d t |}td| t |}d|v r;td	td
| t |D ]}	|	|vrRtd|	 qFttdd |D tdd |D d }
|rvt|
tdd |D }
td|
 t	
|d| d | d \}}t	
|d| d | d \}}t	
|d| d | d \}}tdt| tddtt| t|dd |D  }| d rt||d t||d t |}t||d t||d td| t||| d |
| d d| t||| d |
| d d| t |}t || d }|r8| d r8td t t|dd t|}tt|| }t|| d }td!dtt| t| ||||||||
||}|| t|| d"  ||||fS )#zd
    Builds a Trainer (with model) and the train_sequences and transitions for the given trees.
    z'Unique constituents in training set: %scheck_valid_statesdevstrict_check_constituentsfailsilverzConstituent node counts: %sNz8Fatal problem: the tagger put None on some of the nodes!zUnique tags in training set: %szIFound tag in dev set which does not exist in train set: %s  Continuing...c                 s       | ]}|  V  qd S r&   count_unary_depth.0tr/   r/   r0   	<genexpr>       z build_trainer.<locals>.<genexpr>c                 s   rq   r&   rr   rt   r/   r/   r0   rw      rx   r6   c                 s   rq   r&   rr   rt   r/   r/   r0   rw      rx   zUnary limit: %dtrainingtransition_schemereversedz)Total unique transitions in train set: %dz(Unique transitions in training set:
  %s
  c                 S   s   g | ]}|  D ]}|qqS r/   )
components)ru   transxr/   r/   r0   
<listcomp>   s    z!build_trainer.<locals>.<listcomp>zRoot labels in treebank: %strainrare_word_thresholduse_silver_wordsz2Getting silver words to add to the delta embeddingzSilver wordspostfixz$Using the following open nodes:
  %srd   )r   get_unique_constituent_labelsrI   rJ   r   get_constituent_countsget_unique_tagsRuntimeErrormaxr   convert_trees_to_sequencesrK   joinmapstrsetr   check_transitionsget_root_labelsr   r   get_unique_wordsget_rare_wordsget_common_wordstqdmsortedr   r   build_trainerlog_num_words_knownrj   )r>   train_trees	dev_treessilver_treesr?   r^   train_constituentsconstituent_countstagstagunary_limittrain_sequencestrain_transitionsdev_sequencesdev_transitionssilver_sequencessilver_transitionsexpanded_train_transitionsroot_labelswords
rare_wordssilver_words
open_nodesrS   r/   r/   r0   r   z   sd   






r   c                 C   s  t | t | d dkr| d d }nd}| d r:ddla| d r%| d nd| d  }tj|| d	 tjjd
dd t|d}t | d  t	
| d }tdt| | d r_t|d}t|}t	
| d }tdt| t|d}g }| d rt	
| d }tdt| | d rt|d}|durtd| d | d  t||| d }t||| d }t||| d }td |r|d jnt }	t| ||||	|\}
}}}| d r|
  t| |
|||||||	|
}
W d   n1 sw   Y  | d rt  |
S ) zG
    Build a model, train it using the requested train & dev files
    r5   r   r6   Nwandb
wandb_namez%s_constituency	shorthand)nameconfig	dev_scorer   )summaryr7   save_dir
train_filez"Read %d trees for the training settrain_remove_duplicatesr   rA   zRead %d trees for the dev setrl   silver_filez)Read %d trees for the silver training setsilver_remove_duplicatesrp   rC   rM   rD   rV   rE   r@   )r   log_training_argsrI   r   initrundefine_metricr   
ensure_dirr   rH   rJ   rK   r   r   r   r?   r   r   r@   iterate_trainingfinish)r>   r^   rP   r8   r   rQ   r   r   r   r?   rS   r   r   r   r/   r/   r0   r      sP   




"r   c                 C   s(   dd | D }dd t | ||D }|S )Nc                 S   s   g | ]}d d |  D qS )c                 S   s(   g | ]}t |jt |jd  jddqS )r   )label)r   children)r   r   r   )ru   preterminalr/   r/   r0   r     s     z1compose_train_data.<locals>.<listcomp>.<listcomp>)yield_preterminals)ru   r"   r/   r/   r0   r     s
    
z&compose_train_data.<locals>.<listcomp>c                 S   s   g | ]}t | qS r/   )r!   ru   r   r/   r/   r0   r   
      )zip)trees	sequencespreterminal_listsdatar/   r/   r0   compose_train_data  s
   r   c                 C   s\   |sg g fS | }t ||k rt| || t ||k s||d } |d| }| |fS )a  
    Return the next epoch_size trees from the training data, starting
    with leftover data from the previous epoch if there is any

    The training loop generally operates on a fixed number of trees,
    rather than going through all the trees in the training set
    exactly once, and keeping the leftover training data via this
    function ensures that each tree in the training set is touched
    once before beginning to iterate again.
    N)rK   randomshuffleextend)leftover_training_data
train_data
epoch_size
epoch_datar/   r/   r0   next_epoch_data  s   

r   c                 C   s   |j D ]
}|d dkr nqtd|j D ]a}|d dkrv|d }| d dur2|| d k r2d|d< n2| d	 durC|| d	 krCd|d< n!| d
 rZ|| d d k rZ|d | d  |d< n
|d | d  |d< |d |krvtd|d ||d  qdS )zI
    Update the learning rate for the bert finetuning, if applicable
    param_group_namebasez-There should always be a base parameter groupbertlrbert_finetune_begin_epochN        bert_finetune_end_epoch
multistageepochs   stage1_bert_learning_ratebert_learning_ratez(Setting %s finetuning rate from %f to %f)param_groupsAssertionErrorrI   rJ   )r>   	optimizerepochs_trainedbase_param_groupparam_groupold_lrr/   r/   r0   update_bert_learning_rate$  s&   




r   c
           +         s  | d dkrt d dd }
tjdd}nL| d dkrEzd	d
lm} W n ty/   tdw t d| d  dd }
|d| d d}n| d dkrZt d dd }
tdd}ntd| d  |j	 |
   fddt|jD }|  t||}t||}| d st|| d< |r| d s| d | d< | d ri }| d df|| d d < t| r| d df|| d d d < d }| d! tju rt|j| d" | d# | d$ }n,| d! tju rt|j| d" | d# | d$ }n| d! tju rt|j| d" | d# | d$ }g }g }|jd	krt d%|j|j|j | d& d	kr1|jd	kr1|j| d' |j dd( t|jd) | d d) D ]B|_|  t d*|j t| |j |j | d+ r]|!  t"||| d \}}t"||| d \}}|| }|j#d,d d- t$|j|||
|||| }t%|j&||| |	\}}}||jks|jd	kr|jd.krt d/||j ||_|j|_|j| d0 dd( |j'd	krt (d1|j' d2|j d3|j) d4|j* d5|j+ d6|j|f d7|j|jf g}t d8,| |j j-d	 d9 }|j./| |j j-d	 d9 }||krt d:|| | d; rNt0j1|j+|d<|jd= | d> rNt23| d> }|j&4 D ]\}}|5|rLt01|t6j78|i q7| d? d	kr|j| d? krt9d@dA |j&j:j;|j&j<j;|j&j=j;fD rzt dB|j d	|j&j:_;d	|j&j<_;d	|j&j=_;| d rE|j|v rE|j}|j>} || \}!}"t?@|j&jA}#|#Bdd  |#BdCd  tCjD| d0 |#ddD}|j&}$t dE| t dF|j tE| }#t dG|!|"rdHndI |!|#d< |"sd	|#dC< |F| dJ }%|G| dK }&|G| dL }'t|%|&|'|$jH|$jI|$jJ|$jK|$j|$jL|$jM|$jN|$jO|$j|$jP|$Q |#}(|(
  |(R|$ tS|#|(d})tT|#|)}*tC|(|)|*|| |j|j}tU|| dM  | dN rX| dO rX|j| dO dd( | d& d ur| d& |jkr|j| dP  d	kr|j| d' |j | dQ d( q=|S )Ra  
    Given an initialized model, a processed dataset, and a secondary dev dataset, train the model

    The training is iterated in the following loop:
      extract a batch of trees of the same length from the training set
      convert those trees into initial parsing states
      repeat until trees are done:
        batch predict the model's interpretation of the current states
        add the errors to the list of things to backprop
        advance the parsing state for each of the trees
    losscrosszBuilding CrossEntropyLoss(sum)c                 S      | S r&   r/   r   r/   r/   r0   re   X      z"iterate_training.<locals>.<lambda>sum)	reductionfocalr   )	FocalLossz^focal_loss not installed.  Must `pip install focal_loss_torch` to use the --loss=focal featurezBuilding FocalLoss, gamma=%floss_focal_gammac                 S   s   t j| ddS )Nr6   )dim)r`   softmaxr   r/   r/   r0   re   `  s    )r   gammalarge_marginz&Building LargeMarginInSoftmaxLoss(sum)c                 S   r   r&   r/   r   r/   r/   r0   re   d  r   zUnexpected loss term: %sc                    s(   i | ]\}}|t j|d  ddqS )F)rg   r<   r   )r`   tensor	unsqueeze)ru   yr   r<   r/   r0   
<dictcomp>k  s    z$iterate_training.<locals>.<dictcomp>r   silver_epoch_sizer   pattn_num_layersFr   r   T      Nrz   oracle_leveladditional_oracle_levelsdeactivated_oracle_levelszLRestarting trainer with a model trained for %d epochs.  Best epoch %d, f1 %fsave_each_startsave_each_name)save_optimizerr6   zStarting epoch %drF   c                 S   s   t | d S )Nr6   )rK   r   r/   r/   r0   re     s    )keyr   zNew best dev score: %.5f > %.5f	save_namez!Had to ignore %d batches with NaNzEpoch %d finishedzTransitions correct: %szTransitions incorrect: %szTotal loss for epoch: %.5fzDev score      (%5d): %8fzBest dev score (%5d): %8fr|   r   z$Updating learning rate from %f to %fr   )r+   r   )stepwandb_norm_regexearly_dropoutc                 s   s    | ]}|d kV  qdS )r   Nr/   r   r/   r/   r0   rw     rx   z#iterate_training.<locals>.<genexpr>z"Setting dropout to 0.0 at epoch %dlattn_d_proj)r[   z1Finished stage at epoch %d.  Restarting optimizerz#Previous best model was at epoch %dz5Switching to a model with %d pattn layers and %slattn zNO r9   r:   r;   rd   
checkpointcheckpoint_save_namesave_each_frequencysave_each_optimizer)VrI   rJ   r   CrossEntropyLossfocal_loss.focal_lossr   ImportErrorr   
ValueErrorr<   to	enumeratetransitionsr   r   rK   r   
uses_lattnr   IN_ORDERr   r   IN_ORDER_COMPOUNDr   TOP_DOWNr   
best_epochr   best_f1r\   ranger   r   rF   r   sorttrain_model_one_epochrN   rL   r,   warningr'   r(   r+   r   r   	schedulerr  r   logrecompilenamed_parameterssearchr`   linalgnormanyword_dropoutri   predict_dropoutlstm_input_dropoutbatches_trainedcopydeepcopyr>   popr   rG   dictload_pretrainload_charlm
bert_modelbert_tokenizerforce_bert_saved	peft_nameconstituentsr   delta_wordsr   constituent_opensr   copy_with_new_structurer   r   rj   )+r>   rS   r   r   r  r   r   r   r?   rQ   process_outputsmodel_loss_functionr   transition_tensorsr   silver_datamultistage_splitsoracler   leftover_silver_datar   epoch_silver_dataepoch_statsrW   rY   stats_log_linesr   new_lrwatch_regexnri   r   r-  stage_pattn_layersstage_uses_lattn	temp_argsrL   ptforward_charlmbackward_charlm	new_modelr   r!  r/   r   r0   r   F  s  






 
$


,






2r   c                 C   s`  t tdt||d }t| |j}	tdt t t dd}
tt	|d|  dD ]3\}}||||d   }t
| ||j||||||	}| jd7  _|jdkrU|	  |	  |
| }
q*tdd |
j D }td	d |
j D }td
|t|
j td|t|
j t|
jdkrtdddd |
j D  |
jdkrtd|
j |
S )Nr   train_batch_sizer   zEpoch %dr   r6   c                 s       | ]\}}|V  qd S r&   r/   ru   rY   vr/   r/   r0   rw   *  rx   z(train_model_one_epoch.<locals>.<genexpr>c                 s   rQ  r&   r/   rR  r/   r/   r0   rw   +  rx   zTransitions correct: %d
  %szTransitions incorrect: %d
  %szOracle repairs:
  %sr|   c                 s   s&    | ]\}}d |j |j|f V  qdS )z%s (%s): %dN)r   value)ru   r   r   r/   r/   r0   rw   /  s   $ zFake transitions used: %d)listr  rK   r   r   r   r%   r   r  r   train_model_one_batchrL   r-  r,   r  	zero_gradr   r'   itemsr(   rI   rJ   r   r)   r   most_commonr*   )epochrS   r>  r<  r=  r   rA  r>   interval_startsr   rD  	batch_idxinterval_startbatchbatch_statstotal_correcttotal_incorrectr/   r/   r0   r    s*   


"
r  c	           &   	      s  | dd |D dd |D dd |D }	t }
t }t }d}g }g }t|	dkrH|j|	dd\}}}dd |	D } fd	d|D }|| || g }g }t|||	D ]\}}}||kr|
|   d
7  < |jd
 t|j	k r|dur| |d krt

 |d k rt
|j}|||r||||\}}|dur||j|d || |d
 }qZ|| || qZ|| | f  d
7  < |jd
 t|j	krqZ|du s| |d k s|||s|| || qZ||||\}}|jrJ ||  d
7  < |dur(t

 |d k r(||j|d || qZ|| || qZt|	dkrB|j||dd}	t|	dks,t|}t|}||}|||}|  |d rd} td|d | | t|d }!tj D ]F\}"}#|!|"rd} |#jr|#jdurtd|"tj |#tj |#j q||#jrtd|"tj |# q|td|"tj |# q|| std t!t"|rd}$d
}%n|# }$d}%t$|$|
||||%S )aB  
    Train the model for one batch

    The model itself will be updated, and a bunch of stats are returned
    It is unclear if this refactoring is useful in any way.  Might not be

    ... although the indentation does get pretty ridiculous if this is
    merged into train_model_one_epoch and then iterate_training
    c                 S      g | ]}|j qS r/   )r$   r   r/   r/   r0   r   A      z)train_model_one_batch.<locals>.<listcomp>c                 S   rb  r/   )r"   r   r/   r/   r0   r   B  rc  c                 S   rb  r/   r#   r   r/   r/   r0   r   C  rc  r   F)is_legalc                 S   s   g | ]}|j |j qS r/   )r#   num_transitionsr   r/   r/   r0   r   Y  s    c                    s   g | ]} | qS r/   r/   )ru   gold_transitionr>  r/   r0   r   Z  r   r6   Noracle_initial_epochoracle_forced_errorsrd  oracle_frequencyTrn   rG  z#Watching %s   ... epoch %d batch %dz  %s norm: %f grad: %fz)  %s norm: %f grad required, but is None!z  %s norm: %f grad not requiredz  (none found!)r   )%initial_state_from_preterminalsr   rK   predictappendr   r   
short_namerf  r#   r   choicer  re  	fix_error_replace
is_correct
bulk_applyr`   catbackwardrI   rJ   r#  r$  rS   rL   r%  r&  rg   rb   r'  r(  r)  isnanitemr%   )&rZ  r\  rL   training_batchr>  r<  r=  rA  r>   current_batchr'   r(   r)   r*   
all_errorsall_answersoutputspred_transitionsrY   gold_transitionstrans_tensor	new_batchupdate_transitionspred_transitionrg  statefake_transitionnew_sequencerepair_typeerrorsanswers	tree_lossmatchedrG  rH  ri   
batch_lossr,   r/   rh  r0   rV  5  s   

$


 



;


&
rV  c              
   C   s  t dt||d  |   |dd}|dk}t|tdd\}}tt|}	| j|	| j	|d | j
|d}
t|
|}
|
}|dkrzt d	|d  |
g}tt|D ]}tt|d
d| d}	|| j|	| j	|d | j|d qRdd t| D }t|t|k rt dt|t| n
dd t||D }|dddkr|d rtj|d d
d tj|d |d d }tj|d |d d }tj|rt d| ntj|rt d| nt|d-}|D ]"}|jd j}|d r||j}||d | |d qW d   n	1 sw   Y  t|D ]S}tj|d |d d|  }t|d3}||d   D ]#}|jd j}|d rW||j}||d | |d qDW d   n	1 ssw   Y  q&t|d}|D ]}||d |j |d qW d   n	1 sw   Y  t|dkrd S |du r|dkrtd!d" |D }nd}t|d#}| |}W d   n	1 sw   Y  n| |}|!d$r|j"nd}|j#||j$fS )%z
    This reparses a treebank and executes the CoreNLP Java EvalB code.

    It only works if CoreNLP 4.3.0 or higher is in the classpath.
    zProcessing %d trees from %srA   r5   r   T)r  reverseeval_batch_size)keep_scoreszGenerating %d random analysesFztb%03d)leaver   c                 S   s*   g | ]}t |d  jdd |D ddqS )r   c                 S   s   g | ]}|j d  qS )r   )predictions)ru   ri   r/   r/   r0   r     s    z*run_dev_set.<locals>.<listcomp>.<listcomp>N)r   gold)ru   parsesr/   r/   r0   r     s    "zrun_dev_set.<locals>.<listcomp>z&Only evaluating %d trees instead of %dc                 S   s   g | ]
\}}|j |d qS ))r  )rr  )ru   r   r  r/   r/   r0   r     s    modeNrm  predict_filepredict_dir)verbosez	.pred.mrgz	.orig.mrgz!Cowardly refusing to overwrite {}wpredict_output_gold_tagspredict_format
z.%03d.pred.mrgr6   )r   r   c                 s   s    | ]}t |jV  qd S r&   )rK   r  )ru   frr/   r/   r0   rw     s    zrun_dev_set.<locals>.<genexpr>r7   rX   )%rI   rJ   rK   evalgetr	   iterr   parse_sentences_no_gradbuild_batch_from_treesrm  r
   r  rn  weighted_choicer   r   r   r   ospathr   existsformatopenr  r"   replace_tagsr  writer   r   processHasFieldrX   rW   treeF1)rL   retagged_treesoriginal_treesr>   rQ   r5   r  sorted_treesoriginal_indicestree_iteratorrT   full_resultsgenerated_treebanksi	pred_file	orig_filefoutr"   output_treer8   responserX   r/   r/   r0   rN     s   
$




rN   r&   )Fcollectionsr   r   r.  loggingr  r   r#  r`   r   stanza.models.commonr   %stanza.models.common.foundation_cacher   r   &stanza.models.common.large_margin_lossr   stanza.models.common.utilsr	   r
   stanza.models.constituencyr   r   r   3stanza.models.constituency.in_order_compound_oracler   *stanza.models.constituency.in_order_oracler   %stanza.models.constituency.lstm_modelr   ,stanza.models.constituency.parse_transitionsr   %stanza.models.constituency.parse_treer   *stanza.models.constituency.top_down_oracler   "stanza.models.constituency.trainerr    stanza.models.constituency.utilsr   r   r   r   r   r   r   r   r   stanza.server.parser_evalr   r   stanza.utils.get_tqdmr    r   	getLoggerrI   r!   r%   rZ   r_   rj   r   r   r   r   r   r   r  rV  rN   r/   r/   r/   r0   <module>   sT    ,

0	N>" L${