o
    –h·A  ã                   @   s¢   d Z ddlZddlZddlmZ ddlm  mZ ddlZddl	m
  m  mZ ddlmZ ddlmZ ddlmZ ddl	mZ e d¡ZG d	d
„ d
ejƒZdS )zK
The full encoder-decoder model, built on top of the base seq2seq modules.
é    N)Únn)Úutils)ÚLSTMAttention©ÚBeam)ÚUNK_IDÚstanzac                       sz   e Zd ZdZd‡ fdd„	Zdd„ Zdd„ Zd	d
„ Zdd„ Zddd„Z	dd„ Z
ddd„Zdd„ Zddd„Zddd„Z‡  ZS )ÚSeq2SeqModelzÈ
    A complete encoder-decoder model, with optional attention.

    A parent class which makes use of the contextual_embedding (such as a charlm)
    can make use of unsaved_modules when saving.
    Nc                    s†  t ƒ  ¡  g | _|d | _|d | _|d | _|d | _| dd¡| _|d | _	t
j| _|d | _| d	d
¡| _|| _|| _|  d|¡ t d¡ t d¡ d| _| jd | _| j| _| dd¡| _| dd¡| _| dd¡| _| dd¡| _| dd¡| _| dd¡| _| dd¡| _t | j¡| _ t | j	¡| _!t "| j| j| j¡| _#| j| _$| j%d ur¸|  j$| j% ¡ 7  _$tj&| j$| j| jdd| jdkrÊ| j	ndd| _'t(| j| jd| jd d| _)t *| j| j¡| _+| jr| jdkrt d¡ t "| j| j| j¡| _,t | j¡| _-| jr$| jd }t .t *| j|¡t /¡ t *|| j¡¡| _0| jr0t *| jd¡| _1t2 3t
j4g¡}|  5d|¡ |  6¡  d S )NÚ
vocab_sizeÚemb_dimÚ
hidden_dimÚ
num_layersÚemb_dropoutg        ÚdropoutÚmax_dec_lenÚtopg    _ BÚcontextual_embeddingz(Building an attentional Seq2Seq model...zUsing a Bi-LSTM encoderé   ÚposFÚpos_dimr   Úpos_vocab_sizeÚpos_dropoutÚeditÚnum_editÚcopyTé   )ÚbidirectionalÚbatch_firstr   Ú	attn_type)r   r   zUsing POS in encoderÚ
SOS_tensor)7ÚsuperÚ__init__Úunsaved_modulesr
   r   r   ÚnlayersÚgetr   r   ÚconstantÚPAD_IDÚ	pad_tokenr   r   ÚargsÚ
emb_matrixÚadd_unsaved_moduleÚloggerÚdebugÚnum_directionsÚenc_hidden_dimÚdec_hidden_dimÚuse_posr   r   r   r   r   r   r   ÚDropoutÚemb_dropÚdropÚ	EmbeddingÚ	embeddingÚ	input_dimr   ÚLSTMÚencoderr   ÚdecoderÚLinearÚ	dec2vocabÚpos_embeddingÚpos_dropÚ
SequentialÚReLUÚedit_clfÚ	copy_gateÚtorchÚ
LongTensorÚSOS_IDÚregister_bufferÚinit_weights)Úselfr(   r)   r   Úedit_hiddenr   ©Ú	__class__© ú]/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/common/seq2seq_model.pyr!      sn   









ÿ

ÿ

ýzSeq2SeqModel.__init__c                 C   s    |  j |g7  _ t| ||ƒ d S ©N)r"   Úsetattr)rG   ÚnameÚmodulerK   rK   rL   r*   X   s   zSeq2SeqModel.add_unsaved_modulec                    s   t j}ˆ jd ur5tˆ jtjƒrt ˆ j¡ˆ _ˆ j ¡ ˆ j	ˆ j
fks+J d ˆ j	ˆ j
¡ƒ‚ˆ jjj ˆ j¡ n
ˆ jjj | |¡ ˆ jdkrOt d¡ dˆ jj_n ˆ jˆ j	k rjt d ˆ j¡¡ ˆ jj ‡ fdd„¡ nt d¡ ˆ jr~ˆ jjj | |¡ d S d S )	Nz/Input embedding matrix must match size: {} x {}r   z Do not finetune embedding layer.FzFinetune top {} embeddings.c                    s   t  | ˆ j¡S rM   )r   Úkeep_partial_gradr   )Úx©rG   rK   rL   Ú<lambda>m   s    z+Seq2SeqModel.init_weights.<locals>.<lambda>zFinetune all embeddings.)r%   ÚEMB_INIT_RANGEr)   Ú
isinstanceÚnpÚndarrayrB   Ú
from_numpyÚsizer
   r   Úformatr5   ÚweightÚdataÚcopy_Úuniform_r   r+   r,   Úrequires_gradÚregister_hookr0   r<   )rG   Ú
init_rangerK   rS   rL   rF   \   s&   
ÿ


ÿzSeq2SeqModel.init_weightsc                 C   sV   |  d¡}| jj}tj| jjd || jd|d}tj| jjd || jd|d}||fS )Nr   r   F)r`   Údevice)rZ   r   rc   rB   Úzerosr8   r   r.   )rG   ÚinputsÚ
batch_sizerc   Úh0Úc0rK   rK   rL   Ú
zero_statet   s
   
zSeq2SeqModel.zero_statec                 C   sŒ   |   |¡\}}tjjj||dd}|  |||f¡\}\}}tjjj|dd\}	}
t |d |d fd¡}t |d |d fd¡}|	||ffS )z Encode source sequence. T)r   éÿÿÿÿéþÿÿÿr   )	ri   r   r   ÚrnnÚpack_padded_sequencer8   Úpad_packed_sequencerB   Úcat)rG   Ú
enc_inputsÚlensrg   rh   Úpacked_inputsÚpacked_h_inÚhnÚcnÚh_inÚ_rK   rK   rL   Úencode{   s   zSeq2SeqModel.encodeFc                 C   sh  ||f}| j ||||| jd}	| jr|	\}
}}n|	\}
}|
 ¡  |
 d¡|
 d¡ d¡}|  |¡}| |
 d¡|
 d¡d¡}|  |¡}| jr |  |
¡}| jr\|dd…dd…dd…f }t	 
|d¡}t	jj |¡| }|jdddd }|| }t	 |¡}t| ¡ ƒ}t	 |¡|d kr•t	 |¡d |d< | |¡}| d¡ | d¡| d¡| d¡¡}| d||¡}|dk}t	 | |d¡¡| }| |d	¡}t	 dt	 |¡ ¡ }|jd |d k r| |¡}||dd…dd…d|jd …f< |dd…dd…tf  d
¡|dd…dd…|jd d…f< |}|| }t	 t	 ||g¡d¡}|r0tdƒ|dd…dd…tf< ||fS )zD Decode a step, based on context encoding and source context states.)Úreturn_logattnr   r   rj   NT©Úkeepdimgê-™—q=g   ¢”mÂr   z-inf)r9   r   Ú
contiguousÚviewrZ   r;   Úget_log_probrA   r0   rB   Úlog_softmaxr   Ú
functionalÚ
logsigmoidÚmaxÚexpÚlistÚ	new_zerosÚ	unsqueezeÚexpandÚscatter_addÚlogÚmasked_fillÚshaper   Ú	logsumexpÚstackÚfloat)rG   Ú
dec_inputsrt   ru   ÚctxÚctx_maskÚsrcÚnever_decode_unkÚ
dec_hiddenÚdecoder_outputÚh_outÚlog_attnÚh_out_reshapeÚdecoder_logitsÚ	log_probsÚ
copy_logitÚlog_copy_probÚmxÚ	copy_probÚcopied_vocab_shapeÚcopied_vocab_probÚscattered_copyÚ	zero_maskÚlog_copied_vocab_probÚlog_nocopy_probÚnew_log_probsrK   rK   rL   Údecode†   sL    




&
 8zSeq2SeqModel.decodec                 C   s  |  ¡ }t||| jk< |  |  |¡¡}| d¡}| jrG|d us#J dƒ‚|  |  |¡¡}t	j
| d¡|gdd}| |dg¡}	t	j
|	|gdd}|d urx| jd urx|  |¡}
| jro|
 |
jd d|
jd f¡}t	j
|
|gdd}
t	j
||
gdd}t|j tj¡ ¡  d¡ƒ}||||fS )Nr   z)Missing POS input for seq2seq lemmatizer.r   ©Údimr   )Úcloner   r
   r2   r5   rZ   r0   r=   r<   rB   ro   r†   r…   r   r‹   r„   r]   Úeqr%   r&   ÚlongÚsum)rG   r’   Úsrc_maskr   ÚrawÚ	embed_srcrp   rf   Ú
pos_inputsÚpos_src_maskÚ
raw_inputsÚ	raw_zerosÚsrc_lensrK   rK   rL   ÚembedÕ   s$   

zSeq2SeqModel.embedc                 C   st   |   ||||¡\}}}}|  ||¡\}	\}
}| jr|  |
¡}nd }|  |  |¡¡}| j||
||	||d\}}||fS )N)r’   )rµ   rx   r   r@   r2   r5   r¦   )rG   r’   r­   Útgt_inr   r®   rp   rf   r´   rv   rt   ru   Úedit_logitsr   rš   rw   rK   rK   rL   Úforwardé   s   zSeq2SeqModel.forwardc                 C   sL   |  d| j¡}tj|dd}| ¡ dkr|S |  | d¡| d¡| d¡¡S )Nrj   r   r§   r   r   )r}   r
   ÚFr   r¨   rZ   )rG   ÚlogitsÚlogits_reshaperš   rK   rK   rL   r~   ú   s
    zSeq2SeqModel.get_log_probc              	   C   sˆ  |   ||||¡\}}}}|  ||¡\}	\}
}| jr|  |
¡}nd}|  | j¡}| || d¡| d¡¡}dd„ t|ƒD ƒ}d}d}dd„ t|ƒD ƒ}||k rÀ|| j	k rÀ| j
||
||	|||d\}\}
}| d¡dksnJ dƒ‚| d¡jdd	d
\}}| ¡ }t||| jk< |  |¡}|d7 }t|ƒD ]$}|| s¶|j| d  ¡ }|tjkr¯d	||< |d7 }q’||  |¡ q’||k rÀ|| j	k sS||fS )z Predict with greedy decoding. Nr   r   c                 S   s   g | ]}d ‘qS )FrK   ©Ú.0rw   rK   rK   rL   Ú
<listcomp>  ó    z/Seq2SeqModel.predict_greedy.<locals>.<listcomp>c                 S   s   g | ]}g ‘qS rK   rK   r¼   rK   rK   rL   r¾     r¿   ©r’   r“   z"Output must have 1-step of output.Trz   )rµ   rx   r   r@   r5   r   r‡   rZ   Úranger   r¦   Úsqueezer‚   r©   r   r
   r]   Úitemr%   ÚEOS_IDÚappend)rG   r’   r­   r   r®   r“   rp   rf   r´   rv   rt   ru   r·   r   ÚdoneÚ
total_doneÚmax_lenÚoutput_seqsrš   rw   ÚpredsÚiÚtokenrK   rK   rL   Úpredict_greedy  s:    


€ïzSeq2SeqModel.predict_greedyé   c              
      s@  ˆ dkr| j |||||dS |  ||||¡\}}}	}|  ||	¡\}
\}}| jr-|  |¡}nd}t ¡ $ |
j ˆ dd¡}
| ˆ d¡}|j ˆ d¡}|j ˆ d¡}W d  ƒ n1 sZw   Y  | j	j
‰‡ ‡fdd„t|ƒD ƒ}dd„ }t| jƒD ]l}t dd„ |D ƒ¡ ¡  ¡  d	d¡}t||| jk< |  |¡}| j||||
|||d
\}\}}| ˆ |d	¡ dd¡ ¡ }g }t|ƒD ] }||  |j| ¡}|rÎ||g7 }|||f|||  ¡ ˆ ƒ q»t|ƒ|krä nqxg g }}t|ƒD ]-}||  ¡ \}}||d g7 }|d }||  |¡}t |¡}dd„ |D ƒ}||g7 }qî||fS )z Predict with beam search. r   )r“   Nc                    s   g | ]}t ˆ ˆƒ‘qS rK   r   r¼   ©Ú	beam_sizerc   rK   rL   r¾   A  s    z(Seq2SeqModel.predict.<locals>.<listcomp>c                 S   sR   | D ]$}|  ¡ \}}| ¡  ||| |¡dd…|f }|j |j d|¡¡ qdS )z/ Select the states according to back pointers. Nr   )rZ   r|   r}   r]   r^   Úindex_select)ÚstatesÚidxÚ	positionsrÐ   ÚeÚbrÚdÚsrK   rK   rL   Úupdate_stateC  s
   "ýz*Seq2SeqModel.predict.<locals>.update_statec                 S   ó   g | ]}|  ¡ ‘qS rK   )Úget_current_state)r½   ÚbrK   rK   rL   r¾   L  ó    rj   rÀ   r   c                 S   rÚ   rK   )rÃ   )r½   rË   rK   rK   rL   r¾   h  rÝ   )rÍ   rµ   rx   r   r@   rB   Úno_gradr]   Úrepeatr   rc   rÁ   r   r   Útr|   r}   r   r
   r5   r¦   Ú	transposeÚadvanceÚget_current_originÚlenÚ	sort_bestÚget_hypr   Ú	prune_hyp)rG   r’   r­   r   rÐ   r®   r“   rp   rf   r´   rv   rt   ru   r·   ÚbeamrÙ   rË   r   rš   rÆ   rÜ   Úis_doneÚall_hypÚ
all_scoresÚscoresÚksÚkÚhyprK   rÏ   rL   Úpredict*  sR   
û$
 
ÿ

zSeq2SeqModel.predict)NN)NNF)NrÎ   NF)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r!   r*   rF   ri   rx   r¦   rµ   r¸   r~   rÍ   rð   Ú__classcell__rK   rK   rI   rL   r	      s    >
O

)r	   )rô   ÚloggingrB   r   Útorch.nn.functionalr€   r¹   ÚnumpyrW   Ú%stanza.models.common.seq2seq_constantÚmodelsÚcommonÚseq2seq_constantr%   Ústanza.models.commonr   Ú$stanza.models.common.seq2seq_modulesr   Ústanza.models.common.beamr   r   Ú	getLoggerr+   ÚModuler	   rK   rK   rK   rL   Ú<module>   s    
