o
    –hÏX  ã                   @   sb  d Z 	 ddlZddlZddlZddlZddlZddlZddlZddlZ	ddl
Z
ddlZddlmZmZ ddlm  m  mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lT ddlmZmZ ddl m!Z! ddl"m#Z# e $d¡Z%dd„ Z&d"dd„Z'd"dd„Z(dd„ Z)dd„ Z*dd„ Z+dd„ Z,dd„ Z-dd „ Z.e/d!kr¯e(ƒ  dS dS )#zÿ
Entry point for training and evaluating a dependency parser.

This implementation combines a deep biaffine graph-based parser with linearization and distance features.
For details please refer to paper: https://nlp.stanford.edu/pubs/qi2018universal.pdf.
é    N)ÚnnÚoptim)Ú
DataLoader)ÚTrainer)Úscorer)Úutils)Úpretrain)Úaugment_punct)Ú*)Úadd_peft_argsÚresolve_peft_args)ÚCoNLL)Ú_training_loggingÚstanzac                  C   sb  t  ¡ } | jdtddd | jdtddd | jdtd d	d | jd
td dd | jdtd dd | jdtd dd | jdtd dd | jddddd | jddddgd | jdtdd | jdtdd | jdtd d! | jd"td d! | jd#td d! | jd$td%d! | jd&td'd! | jd(td%d! | jd)td*d! | jd+d,dd-d.d/ | jd0d1dd-d2d/ | jd3d4dd-d5d/ | jd6td7d! | jd8td9d! | jd:td;d! | jd<td d=d | jd>d?dd@d | jdAtdBd! | jdCtdDd! | jdEtdFd! | jdGtdHdId | jdJtdHdId | jdKdLddMd | jdNdOdPdQ | jdRtdSdTd | jdUtd dVd | jdWtd dXd | jdYtd dZd | jd[td d\d | jd]d^d_d d`da | jdbtdcddd | jded_d dfdgdh | jdidjdOdkdl | jdmdnddod | jdpd tdqdr | jdsdttdudr | jdvdwtdxdr | jdydztd{dr | jd|dztd}dr | jd~dtd€dr | jdd‚ddƒd | jd„d…dd†d | jd‡dˆdd‰d | jdŠtdtd‹d | jdŒtddŽd | jdtd dŽd | jdtd‘d’d | jd“td”d•d | jd–td d—d | jd˜td™d! | jdštd d›d | jdœtdzdd | jdžtdŸd! | jd td%d! | jd¡td¢d! | jd£td¤d! | jd¥td¦d! | jd§td d¨d | jd©tdtdªd | jd«td¬d­d | jd®dOdjd¯d° | jd±td²dd | jd³td´dµd | jd¶td d·d | jd¸td¹d! t| ƒ t | ¡ | jdºtd d»d | jd¼dOd½dQ | jd¾d d¿dÀ | S )ÁNz
--data_dirzdata/depparsezRoot dir for saving models.)ÚtypeÚdefaultÚhelpz--wordvec_dirzextern_data/word2veczDirectory of word vectors.z--wordvec_filezWord vectors filename.z--wordvec_pretrain_filez'Exact name of the pretrain file to readz--train_filezInput file for data loader.z--eval_filez--output_filezOutput CoNLL-U file.z--no_gold_labelsÚgold_labelsÚstore_falsezhDon't score the eval file - perhaps it has no gold labels, for example.  Cannot be used at training time)ÚdestÚactionr   z--modeÚtrainÚpredict)r   Úchoicesz--langÚLanguage)r   r   z--shorthandzTreebank shorthandz--hidden_dimi  )r   r   z--char_hidden_dimz--deep_biaff_hidden_dimz!--composite_deep_biaff_hidden_diméd   z--word_emb_diméK   z--char_emb_dimz--tag_emb_dimé2   z	--no_uposÚuse_uposTz0Don't use upos tags as part of the tag embedding)r   r   r   r   z	--no_xposÚuse_xposz0Don't use xpos tags as part of the tag embeddingz--no_ufeatsÚ
use_ufeatsz-Don't use ufeats as part of the tag embeddingz--transformed_dimé}   z--num_layersé   z--char_num_layersé   z--checkpoint_save_namez,File name to save the most recent checkpointz--no_checkpointÚ
checkpointzDon't save checkpointsz--pretrain_max_vocabiÐ z--word_dropoutg…ëQ¸Õ?z	--dropoutg      à?z--rec_dropoutr   zRecurrent dropoutz--char_rec_dropoutz	--no_charÚcharzTurn off character model.z--charlmÚ
store_truezVTurn on contextualized char embedding using pretrained character-level language model.)r   r   z--charlm_save_dirzsaved_models/charlmz7Root dir for pretrained character-level language model.z--charlm_shorthandz=Shorthand for character-level language model training corpus.z--charlm_forward_filez$Exact path to use for forward charlmz--charlm_backward_filez%Exact path to use for backward charlmz--bert_modelz>Use an external bert model (requires the transformers package)z--no_bert_modelÚ
bert_modelÚstore_constzDon't use bert)r   r   Úconstr   z--bert_hidden_layersé   z;How many layers of hidden state to use from the transformerz--bert_hidden_layers_originalÚbert_hidden_layersz&Use layers 2,3,4 of the Bert embedding)r   r)   r   r   z--bert_finetuneFz(Finetune the bert (or other transformer))r   r   r   z--no_bert_finetuneÚbert_finetunez.Don't finetune the bert (or other transformer)z--bert_finetune_layersz3Only finetune this many layers from the transformer)r   r   r   z--bert_learning_rateg      ð?z?Scale the learning rate for transformer finetuning by this muchz--second_bert_learning_rategü©ñÒMbP?z:Secondary stage transformer finetuning learning rate scalez--bert_start_finetuningéÈ   z(When to start finetuning the transformerz--bert_warmup_stepszBHow many steps for a linear warmup when finetuning the transformerz--bert_weight_decayg        z)Weight decay bert parameters by this muchz--no_pretrainr   zTurn off pretrained embeddings.z--no_linearizationÚlinearizationzTurn off linearization term.z--no_distanceÚdistancezTurn off distance term.z--sample_trainzSubsample training data.z--optimÚadamzsgd, adagrad, adam or adamax.z--second_optimz--lrgú~j¼t“h?zLearning ratez--second_lrga2U0*©3?zSecondary stage learning ratez--weight_decayz$Weight decay for the first optimizerz--beta2gffffffî?z--second_optim_start_stepzÈIf set, switch to the second optimizer when stalled or at this step regardless of performance.  Normally, the optimizer only switches when the dev scores have stalled for --max_steps_before_stop stepsz--second_warmup_stepsz©If set, give the 2nd optimizer a linear warmup.  Idea being that the optimizer won't have a good grasp on the initial gradients and square gradients when it first startsz--max_stepsiPÃ  z--eval_intervalz--checkpoint_intervaliô  z--max_steps_before_stopiè  z--batch_sizeiˆ  z--second_batch_sizezâUse a different batch size for the second optimizer.  Can be relevant for models with different transformer finetuning settings between optimizers, for example, where the larger batch size is impossible for FT the transformer"z--max_grad_normzGradient clipping.z
--log_stepé   zPrint log every k steps.z--log_normsz,Log the norms of all the parameters (noisy!))r   r   r   z
--save_dirzsaved_models/depparsez--save_namez!{shorthand}_{embedding}_parser.ptzFile name to save the modelz--continue_fromz8File name to preload the model to continue training fromz--seediÒ  z--augment_nopunctz…Augment the training data by copying this fraction of punct-ending sentences as non-punct.  Default of None will aim for roughly 10%%z--wandbzStart a wandb session and write the results of training.  Only applies to training.  Use --wandb_name instead to specify a namez--wandb_namezWName of a wandb session to start when training.  Will default to the dataset short name)r   r   )	ÚargparseÚArgumentParserÚadd_argumentÚstrÚintÚfloatr   r   Úadd_device_args)Úparser© r:   úO/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/parser.pyÚbuild_argparse&   s¢   
r<   c                 C   s4   t ƒ }|j| d} t| tƒ | jrd| _t| ƒ} | S )N©ÚargsT)r<   Ú
parse_argsr   ÚloggerÚ
wandb_nameÚwandbÚvars)r>   r9   r:   r:   r;   r?   ‚   s   
r?   c                 C   sL   t | d} t | d ¡ t d | d ¡¡ | d dkr t| ƒS t| ƒ d S )Nr=   ÚseedzRunning parser in {} modeÚmoder   )r?   r   Úset_random_seedr@   ÚinfoÚformatr   Úevaluater=   r:   r:   r;   Úmain   s   
rJ   c                 C   s   t  | d¡S )Nr9   )r   Ústandard_model_file_namer=   r:   r:   r;   Úmodel_file_name™   s   rL   c                 C   sx   d }| d r:t  | d | d | d | d ¡}tj |¡rd }n| d r'| d n	t | d | d ¡}t  ||| d ¡}|S )	Nr   Úwordvec_pretrain_fileÚsave_dirÚ	shorthandÚlangÚwordvec_fileÚwordvec_dirÚpretrain_max_vocab)r   Úfind_pretrain_fileÚosÚpathÚexistsr   Úget_wordvec_fileÚPretrain)r>   ÚptÚpretrain_fileÚvec_filer:   r:   r;   Úload_pretrain   s    $r]   c                 C   s>   g }t |ƒdkr|D ]}|  |¡}||7 }q
t ||j¡}|S )Nr   )Úlenr   r   ÚunsortÚdata_orig_idx)ÚtrainerÚ	dev_batchÚ	dev_predsÚbatchÚpredsr:   r:   r;   Úpredict_dataset¨   s   

rf   c           #   
   C   sä  t | ƒ}t tj |¡d ¡ t| ƒ}| d rF| d d u r!tdƒ‚t 	d¡ | d s6d 
| d | d ¡| d< | d	 sFd
 
| d | d ¡| d	< t 	d 
| d ¡¡ tj| d d\}}}t 	d 
t|ƒ¡¡ | t|| d dd¡ t 	d 
t|ƒ¡¡ t|ƒ}t|| d | |dd}|j}tj| d d}t|| d | ||ddd}	| d }
t|ƒdks²t|	ƒdkr¼t 	d¡ t d¡ | d rédd l}| d rÌ| d nd| d  }|j|| d |jjdd d! |jjd"d#d! t 	d$¡ d }|  d%¡rt |  d&¡||  d'¡¡}|| d'< |  d%¡r;tj | d' ¡r;t| ||| d' | d( dd)}t|jƒdkr:t 	d*| d' |jt |jƒ¡ n0| d+ ratj | d+ ¡sQt!d,| d+  ƒ‚t| ||| d+ | d( ddd-}n
t| ||| d( d.}| d/ }| d0 }t" "¡ }d1}d}d}| d2 r‡|j# $¡  	 d}t%|ƒD ]“\}}t" "¡ }| jd37  _|j&|dd4}||7 }d}|j| d5  dkrÈt" "¡ | }t 	| 
|j||||¡¡ |j| d6  dkrZt 	d7¡ t'||	ƒ}|	j( )t*t+gd8d9„ |D ƒ¡ t ,|	j(|
¡ t- .|
| d ¡\}}}|| d6  }t 	d: 
|j||¡¡ | d r| /||d;œ¡ d}| j|g7  _|t |jƒkr<|j|_0| 1|¡ t 	d<¡ d}|j2 3¡ D ]\}}t 	d=|| 4¡ ¡ qA| d2 rZ|j# $¡  |sè|  d>d ¡d urè|j|j0 | d? ks€| d@ d urç|j| d@ krçt 	dA 
|  d>d ¡¡¡ |j} d| dB< t| |j||| d( dC}t 	dD¡ t'||	ƒ}|	j( )t*t+gdEd9„ |D ƒ¡ t ,|	j(|
¡ t- .|
| d ¡\}}}t 	dF|¡ d}| |_| |_0| dG d urå| 5| dG ¡ d}n|j|j0 | d? kr÷d} n,|j| d6  dks|r|d ur|j1|ddH t 	dI¡ |j| d/ kr!d} nqŽ|r'n| 6¡  qˆt 	dJ 
|j¡¡ | d r?| 7¡  t|jƒdkrft |jƒdK t8 9|j¡d3 }!}"t 	dL 
|!|"| d6  ¡¡ |S t 	dM¡ | 1|¡ |S )NNr   ÚcharlmÚcharlm_shorthandzCCharLM Shorthand is required for loading pretrained CharLM model...z.Using pretrained contextualized char embeddingÚcharlm_forward_filez{}/{}_forward_charlm.ptÚcharlm_save_dirÚcharlm_backward_filez{}/{}_backward_charlm.ptú"Loading data with batch size {}...Ú
batch_sizeÚ
train_file©Ú
input_filezOriginal data size: {}Úaugment_nopunctF)Úkeep_original_sentenceszAugmented data size: {})Ú
evaluationÚ	eval_fileT©Úvocabrs   Úsort_during_evalÚoutput_filez*Skip training because no data available...rB   rA   z%s_depparserO   )ÚnameÚconfigÚ
train_lossÚmin)ÚsummaryÚ	dev_scoreÚmaxzTraining parser...r$   rN   Úcheckpoint_save_nameÚdevice)r>   r   rv   Ú
model_filer   Úignore_model_configzgContinuing from checkpoint %s  Model was previously trained for %d steps, with a best dev score of %.4fÚcontinue_fromz9--continue_from specified, but the file %s does not exist)r>   r   rv   r‚   r   rƒ   Úreset_history)r>   rv   r   r   Ú	max_stepsÚlrzAFinished STEP {}/{}, loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}Ú	log_normsr#   )ÚevalÚlog_stepÚeval_intervalzEvaluating on dev set...c                 S   ó   g | ]	}|D ]}|‘qqS r:   r:   ©Ú.0ÚxÚyr:   r:   r;   Ú
<listcomp>  ó    ztrain.<locals>.<listcomp>z0step {}: train_loss = {:.6f}, dev_score = {:.4f})r{   r~   znew best model saved.zscheduler %s learning rate: %sÚsecond_optimÚmax_steps_before_stopÚsecond_optim_start_stepz!Switching to second optimizer: {}Úsecond_stage)r>   rv   r   r‚   r   z;Reloading best model to continue from current local optimumc                 S   rŒ   r:   r:   r   r:   r:   r;   r‘   4  r’   z"Reloaded model with dev score %.4fÚsecond_batch_size)Úsave_optimizerznew model checkpoint saved.zTraining ended with {} steps.r   z'Best dev F1 = {:.2f}, at iteration = {}z-Dev set never evaluated.  Saving final model.):rL   r   Ú
ensure_dirrU   rV   Úsplitr]   Ú
ValueErrorr@   rG   rH   r   Ú
conll2dictr^   Úextendr	   ÚDocumentr   rv   Ú	conll2docÚsysÚexitrB   ÚinitÚrunÚdefine_metricÚgetÚcheckpoint_namerW   r   Údev_score_historyÚglobal_stepr   ÚFileNotFoundErrorÚtimeÚmodelrˆ   Ú	enumerateÚupdaterf   ÚdocÚsetÚHEADÚDEPRELÚwrite_doc2conllr   ÚscoreÚlogÚlast_best_stepÚsaveÚ	schedulerÚitemsÚget_last_lrÚset_batch_sizeÚ	reshuffleÚfinishÚnpÚargmax)#r>   r‚   r   Ú
train_dataÚ_Ú	train_docÚtrain_batchrv   Údev_docrb   Úsystem_pred_filerB   rA   Úcheckpoint_filera   r†   Ú
current_lrÚglobal_start_timeÚ
format_strÚis_second_stager{   Údo_breakÚird   Ú
start_timeÚlossÚforce_checkpointÚdurationrc   r~   Úscheduler_namer·   r¨   Úbest_fÚ	best_evalr:   r:   r;   r   ±   s
  
ÿ


€
 








4
ÿ

€

þªX
 
ý
r   c                 C   sZ   t | ƒ}t| ƒ}|  dd ¡|  dd ¡dœ}t d |¡¡ t||| d |d}t| ||ƒS )Nri   rk   )ri   rk   zLoading model from: {}r   )r   r‚   r   r>   )rL   r]   r¥   r@   rG   rH   r   Úevaluate_trainer)r>   r‚   r   Ú	load_argsra   r:   r:   r;   rI   e  s   

ÿrI   c              	   C   st  | d }|j |j}}| D ]}| d¡s!| d¡s!|dv s!|dkr'| | ||< qt d | d ¡¡ tj| d d	}t|| d |||d
d
d}t	||ƒ}	|j
 ttgdd„ |	D ƒ¡ t |j
|¡ | d r¸tj| d d	}
t|
jƒD ]\}}t|jƒD ]\}}|jd u rtd | d |||¡ƒ‚q{qrt |j
|
¡ t || d ¡\}}}t d¡ t d | d |d ¡¡ d S d S )Nrx   Ú_dirÚ_file)rO   rE   rl   rm   rt   ro   Tru   c                 S   rŒ   r:   r:   r   r:   r:   r;   r‘   ƒ  r’   z$evaluate_trainer.<locals>.<listcomp>r   z7Gold document {} has a None at sentence {} word {}
{:C}zParser score:z	{} {:.2f}rO   r   )r>   rv   Úendswithr@   rG   rH   r   rŸ   r   rf   r®   r¯   r°   r±   r²   r¬   Ú	sentencesÚwordsÚdeprelr›   r   Úscore_named_dependenciesr³   )r>   ra   r   rÄ   Úloaded_argsrv   Úkr®   rd   re   Úgold_docÚsent_idxÚsentenceÚword_idxÚwordrÀ   r³   r:   r:   r;   rÓ   r  s2   $€

ÿÿ
órÓ   Ú__main__)N)0Ú__doc__r    rU   ÚcopyÚshutilrª   r2   ÚloggingÚnumpyr½   ÚrandomÚtorchr   r   Ústanza.models.depparse.dataÚmodelsÚdepparseÚdatar   Ústanza.models.depparse.trainerr   Ústanza.models.depparser   Ústanza.models.commonr   r   Ústanza.models.common.datar	   Ústanza.models.common.docÚ stanza.models.common.peft_configr   r   Ústanza.utils.conllr   Ústanza.modelsr   Ú	getLoggerr@   r<   r?   rJ   rL   r]   rf   r   rI   rÓ   Ú__name__r:   r:   r:   r;   Ú<module>   sL    

\
	 5#
ÿ