o
    hK?                     @   s:  d Z ddlZddlmZ ddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlZddlZddlmZmZmZ ddlmZ ddlmZ ddlmZ ed	Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Z d)ddZ!d)ddZ"dd Z#d)ddZ$d d! Z%d"d# Z&d$d% Z'd&d' Z(e)d(kre"  dS dS )*zR
Entry point for training and evaluating a character-level neural language model.
    N)copy)GeneratorType)build_charlm_vocabCharacterLanguageModelCharacterLanguageModelTrainer)	CharVocab)utils)_training_loggingstanzac                 C   s&   t | tjr
|  S tdd | D S )zJWraps hidden states in new Tensors,
    to detach them from their history.c                 s   s    | ]}t |V  qd S N)repackage_hidden).0v r   O/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/charlm.py	<genexpr>   s    z#repackage_hidden.<locals>.<genexpr>)
isinstancetorchTensordetachtuple)hr   r   r   r      s   r   c                 C   s:   |  d| }| dd|| } | |d} | |} | S )Nr   )sizenarrowviewto)databszdevicenbatchr   r   r   batchify    s
   
r!   c                 C   s^   t || dd | }| d d ||| f }| d d |d |d | f d}||fS )N   r   )minr   reshape)sourceiseq_lenr   targetr   r   r   	get_batch*   s   &r)   c                 C   s`   t | }| }W d    n1 sw   Y  |d |}|dkr+|d d d }t|S )Ncharbackwardr   )r   open_read_textreadmapr   tensor)filenamevocab	directionfinr   idxr   r   r   	load_file0   s   

r5   c                 c   sl    t j| r+tt | }|D ]}td| tt j	| |||}|V  qd S t| ||}|V  d S )NzLoading data from {})
ospathisdirsortedlistdirloggerinfoformatr5   join)r7   r1   r2   	filenamesr0   r   r   r   r   	load_data8   s   
r@   c                  C   s  t jt jd} | jdtdd | jdtdd | jdtdd | jd	td
d | jddddgd | jddddgdd | jdddddd | jdddddd | jdtddd | jdtd d!d | jd"td#d$d | jd%td&d'd | jd(td)d*d | jd+td,d-d | jd.tdd/d | jd0td1d2d | jd3td4d5d | jd6td7d8d | jd9td:d;d | jd<td7d=d | jd>td#d?d | jd@td,dAd | jdBtd,dCd | jdDtdEdFd | jdGtd4dHd | jdItdJdKd | jdLtd dMd | jdNtd dOd | jdPtd dQd | jdRdSdTdUdV | jdWtdXdYd | jdZd[d\d] t|  | jd^td_d` | jdad[dbd] | jdcd ddde | S )fN)formatter_classz--train_filezInput plaintext file)typehelpz--train_dirz>If non-empty, load from directory with multiple training filesz--eval_filez)Input plaintext file for the dev/test setz--shorthandzUD treebank shorthandz--modetrainpredict)defaultchoicesz--directionforwardr+   z"Forward or backward language model)rF   rG   rC   z	--forwardstore_constr2   zTrain a forward language model)actiondestconstrC   z
--backwardzTrain a backward language modelz--char_emb_dimd   zDimension of unit embeddings)rB   rF   rC   z--char_hidden_dimi   zDimension of hidden unitsz--char_num_layersr"   z#Layers of RNN in the language modelz--char_dropoutg?zDropout probabilityz--char_unit_dropoutgh㈵>z1Randomly set an input char to UNK during trainingz--char_rec_dropout        zRecurrent dropout probabilityz--batch_sizezBatch size to usez--bptt_size   z%Sequence length to consider at a timez--epochs2   z#Total epochs to train the model forz--max_grad_normg      ?z Maximum gradient norm to clip toz--lr0   zInitial learning ratez--annealzHAnneal the learning rate by this amount when dev performance deterioratez
--patiencez(Patience for annealing the learning ratez--weight_decayzWeight decayz
--momentumzMomentum for SGD.z--cutoffi  zJFrequency cutoff for char vocab. By default we assume a very large corpus.z--report_stepsz#Update step interval to report lossz--eval_stepsi zKUpdate step interval to run eval on dev; set to -1 to eval after each epochz--save_namezFile name to save the modelz--vocab_save_namezFile name to save the vocabz--checkpoint_save_namez,File name to save the most recent checkpointz--no_checkpoint
checkpointstore_falsezDon't save checkpoints)rK   rJ   rC   z
--save_dirzsaved_models/charlmzDirectory to save models inz	--summary
store_truez&Use summary writer to record progress.)rJ   rC   z--seedi  )rB   rF   z--wandbzStart a wandb session and write the results of training.  Only applies to training.  Use --wandb_name instead to specify a namez--wandb_namezWName of a wandb session to start when training.  Will default to the dataset short name)rF   rC   )	argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentstrintfloatr   add_device_args)parserr   r   r   build_argparseC   sL   
r^   c                 C   s<   | d r	| d }n
d | d | d }tj| d |}|S )N	save_namez{}_{}_charlm.pt	shorthandr2   save_dir)r=   r6   r7   r>   )argsr_   
model_filer   r   r   build_model_filenamep   s
   
rd   c                 C   s*   t  }|j| d} | jrd| _t| } | S )Nrb   T)r^   
parse_args
wandb_namewandbvars)rb   r]   r   r   r   rf   x   s   rf   c                 C   sd   t | d} t| d  td| d | d  t| d  | d dkr,t|  d S t|  d S )Nre   seedz4Running {} character-level language model in {} moder2   modera   rD   )	rf   r   set_random_seedr;   r<   r=   
ensure_dirrD   evaluatere   r   r   r   main   s   
ro   c              	      s0  |   t| j}d}d}t tr&t  t dks"J d d  t | d |}t	
 W td|dd | d D ]A}	t||	| d \ }
 fddt dD }| ||\}}}||d	t|d
 |
}t|}| d|j  7 }q@W d   n1 sw   Y  ||d S )z0
    Run an evaluation over entire dataset.
    Nr   r"   z!Only support single dev/test file
batch_size	bptt_sizec                       g | ]}  d qS r"   r   r   r&   r   r   r   
<listcomp>       z"evaluate_epoch.<locals>.<listcomp>r   r*   )evalnext
parametersr   r   r   listlenr!   r   no_gradranger   r)   rH   r   r   r   item)rb   r1   r   model	criterionr   hidden
total_lossbatchesr&   r(   lensoutputdecodedlossr   rv   r   evaluate_epoch   s(   


r   c                 C   s  t   }t| |||j|j}	t|	}
tt   | }t|| }|j	|	 t|| }||kr5t
d| t
d|j||	|
 |du sI|	|k r[|	}|j|dd t
d|j |ro|jd|	|jd |jd	|
|jd |r|j|d
d t
d|j |	|
|fS )z`
    Run an evaluation over entire dataset, print progress and save the model if necessary.
    zUpdating learning rate to %fzY| eval checkpoint @ global step {:10d} | time elapsed {:6d}s | loss {:5.2f} | ppl {:8.2f}NF)fullz#new best model saved at step {:10d}dev_loss)global_stepdev_pplTz#new checkpoint saved at step {:10d})timer   r   r   mathexprZ   get_current_lr	schedulerstepr;   r<   r=   r   save
add_scalar)rb   r1   r   trainer	best_lossrc   checkpoint_filewriter
start_timer   pplelapsedprevious_lr
current_lrr   r   r   evaluate_and_save   s8   



r   c                 C   s   | j  d|d gd S )N_last_lrlr0r   )r   
state_dictget)r   rb   r   r   r   r      s   r   c                 C   s   dt tj| dd ddiS )Nr*   c                 S   s   | S r   r   )storagelocr   r   r   <lambda>   s    z!load_char_vocab.<locals>.<lambda>T)weights_only)r   load_state_dictr   load)
vocab_filer   r   r   load_char_vocab   s   r   c           %         st  t | t t| }| d d ur| d d | d  n	d| d | d }| d r4t | d || d }nd }tj|rFt	d t
|}n%t	d	 d
t| d d u rW| d n| d | d di}t|d
  | t	dt|d
  |rtj|rt	d|  tj| |dd}nt| |}d }| d rddlm} | d d urd| d | d nd| d | d | d }||d}d}	| d dkrd}	| d rdd l}
| d r| d n	d| d | d f }|
j|| d  |
jjd!d"d# |
jjd$d"d# t|j j}d }|j}t|| d% d& D ]|_| d d ur$| d }n| d }t||| d }t | d' || d }|D ];}t!|| d( |}d }d)}t"#|$d&d& | d*  }d+\}}||$d&d& d& k rw|j%  | j&d&7  _&t'' }t(j)) d,k r| d* n| d* d- }t*d.t+t(j),|d.}t-|t+| d* d/ }t.|||\ } fd0d1t $dD }|j/0  |j1 ||\}}}|2|3d2t|d
 |} || j45 7 }| 6  tj7j 8|j9| d3  |j/:  t;|}|d& | d4  dkr4|| d4  }!t'' | }"t	d5|j|d& ||"| d4  |!t"<|! | d r2|
j=d6|!i|j&d7 d)}|d&7 }||7 }|	rk|j&| d  dkrkt>| |||||||\}#}$}| d rk|
j=|$|t?|| d8|j&d7 ||$d&d& d& k siq<|	r|j| d% krt>| |||||||\}#}$}| d r|
j=|$|t?|| d8|j&d7 q|r|@  | d r|
A  d S )9Nvocab_save_namera   /z{}/{}_vocab.ptr`   rR   checkpoint_save_namezLoading existing vocab filezBuilding and saving vocabr*   	train_dir
train_filecutoff)r   z"Training model with vocab size: {}zLoading existing checkpoint: %sT)finetunesummaryr   )SummaryWriterr_   z{}/{}_summaryz{}/{}_{}_charlm_summaryr2   )log_dirF
eval_stepsrh   rg   z%s_%s_charlm)nameconfigr   r#   )r   r   epochsr"   	eval_filerp   rN   rq   )r   r   gffffff?g       @rQ   g333333?c                    rr   rs   rt   ru   rv   r   r   rw      rx   ztrain.<locals>.<listcomp>r   max_grad_normreport_stepszS| epoch {:5d} | {:5d}/{:5d} batches | sec/batch {:.6f} | loss {:5.2f} | ppl {:8.2f}
train_loss)r   )r   r   lr)Br   log_training_argsr;   rd   r=   checkpoint_namer6   r7   existsr<   r   r   r   r   r   r}   r   r   from_new_modeltorch.utils.tensorboardr   rh   initrundefine_metricrz   r   r{   r   epochr   r@   r5   r!   r   ceilr   rD   r   r   nprandommaxrZ   normalr#   r)   	optimizer	zero_gradrH   r   r   r   r   r+   nnclip_grad_norm_paramsr   r   r   logr   r   closefinish)%rb   rc   r   r   r1   r   r   r   summary_direval_within_epochrh   rg   r   r   start_epoch
train_path
train_datadev_data
data_chunkr   r   r   total_batches	iterationr&   r   bpttr'   r(   r   r   r   r   cur_lossr   _r   r   rv   r   rD      s    


, 
$


$





2

rD   c                 C   sl   t | }t|| d }|j}t| d || d }tj }t	| ||||}t
d|t| d S )Nr   r   r2   z)| best model | loss {:5.2f} | ppl {:8.2f})rd   r   r   r   r1   r@   r   r   CrossEntropyLossr   r;   r<   r=   r   r   )rb   rc   r   r1   r   r   r   r   r   r   rn   S  s   
rn   __main__r   )*__doc__rU   r   logginglzmar   r6   r   r   typesr   numpyr   r   stanza.models.common.char_modelr   r   r   stanza.models.common.vocabr   stanza.models.commonr   stanza.modelsr	   	getLoggerr;   r   r!   r)   r5   r@   r^   rd   rf   ro   r   r   r   r   rD   rn   __name__r   r   r   r   <module>   sH    

-


# 
