o
    –hCy  ã                   @   s  d dl Zd dlZd dlZd dlZd dlmZ d dlmZ d dl	m  m
Z
 ejZG dd„ dƒZG dd„ dejjjƒZG dd„ dejƒZG d	d
„ d
ejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZdS )é    N)ÚVariablec                   @   s   e Zd ZdZdd„ ZdS )ÚBatchIndiceszJ
    Batch indices container class (used to implement packed batches)
    c                 C   sÄ   || _ tj|tj|d| _tdt |¡ ƒ| _t 	dg|dgg¡}t 
|dd … |d d… k¡d | _| jdd … | jd d…  | _t| jƒ| jksLJ ‚tt | jdd … | jd d…  ¡ƒ| _d S )N)ÚdtypeÚdeviceé   éÿÿÿÿr   )Úbatch_idxs_npÚtorchÚ	as_tensorÚlongÚbatch_idxs_torchÚintÚnpÚmaxÚ
batch_sizeÚconcatenateÚnonzeroÚboundaries_npÚseq_lens_npÚlenÚmax_len)Úselfr   r   Úbatch_idxs_np_extra© r   úe/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/constituency/label_attention.pyÚ__init__   s   $,zBatchIndices.__init__N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r      s    r   c                   @   s&   e Zd Zeddd„ƒZedd„ ƒZdS )	ÚFeatureDropoutFunctionç      à?Fc                 C   sÔ   |dk s|dkrt d |¡ƒ‚||_||_||_|jr#| |¡ |}n| ¡ }|jdkrh|jrh| ¡  |j	| 
d¡¡|_|jdkrH|j d¡ n|j d|j ¡ d|j ¡ |j|jd d …f |_| |j¡ |S ©Nr   r   z9dropout probability has to be between 0 and 1, but got {})Ú
ValueErrorÚformatÚpÚtrainÚinplaceÚ
mark_dirtyÚcloneÚnewÚresize_r   ÚsizeÚnoiseÚfill_Ú
bernoulli_Údiv_r   Úmul_)ÚclsÚctxÚinputÚ
batch_idxsr%   r&   r'   Úoutputr   r   r   Úforward%   s&   ÿ

zFeatureDropoutFunction.forwardc                 C   s4   | j dkr| jr| | j¡d d d d fS |d d d d fS )Nr   )r%   r&   Úmulr-   )r3   Úgrad_outputr   r   r   Úbackward@   s   zFeatureDropoutFunction.backwardN)r!   FF)r   r   r   Úclassmethodr7   Ústaticmethodr:   r   r   r   r   r    $   s
    r    c                       s*   e Zd ZdZd‡ fdd„	Zdd„ Z‡  ZS )	ÚFeatureDropoutzå
    Feature-level dropout: takes an input of size len x num_features and drops
    each feature with probabibility p. A feature is dropped across the full
    portion of the input that corresponds to a single batch element.
    r!   Fc                    s8   t ƒ  ¡  |dk s|dkrtd |¡ƒ‚|| _|| _d S r"   )Úsuperr   r#   r$   r%   r'   )r   r%   r'   ©Ú	__class__r   r   r   N   s   
ÿ
zFeatureDropout.__init__c                 C   s   t  ||| j| j| j¡S ©N)r    Úapplyr%   Útrainingr'   )r   r4   r5   r   r   r   r7   V   s   zFeatureDropout.forward)r!   F©r   r   r   r   r   r7   Ú__classcell__r   r   r?   r   r=   H   s    r=   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )ÚLayerNormalizationçü©ñÒMbP?Tc                    sT   t t| ƒ ¡  || _|| _| jr(tjt |¡dd| _	tjt 
|¡dd| _d S d S )NT©Úrequires_grad)r>   rF   r   ÚepsÚaffineÚnnÚ	Parameterr	   ÚonesÚa_2ÚzerosÚb_2)r   Úd_hidrJ   rK   r?   r   r   r   \   s   þzLayerNormalization.__init__c                 C   sv   |  d¡dkr	|S tj|ddd}tj|ddd}|| |¡ | |¡| j  }| jr9|| j |¡ | j |¡ }|S )Nr   r   T)ÚkeepdimÚdim)	r,   r	   ÚmeanÚstdÚ	expand_asrJ   rK   rO   rQ   )r   ÚzÚmuÚsigmaÚln_outr   r   r   r7   e   s   zLayerNormalization.forward)rG   T©r   r   r   r   r7   rE   r   r   r?   r   rF   [   s    	rF   c                       s(   e Zd Zd‡ fdd„	Zddd„Z‡  ZS )	ÚScaledDotProductAttentionçš™™™™™¹?c                    s6   t t| ƒ ¡  |d | _t |¡| _tjdd| _d S )Nr!   r   ©rT   )	r>   r]   r   ÚtemperrL   ÚDropoutÚdropoutÚSoftmaxÚsoftmax)r   Úd_modelÚattention_dropoutr?   r   r   r   t   s   
z"ScaledDotProductAttention.__init__Nc                 C   s†   t  || dd¡¡| j }|d ur/| ¡ | ¡ ks%J d | ¡ | ¡ ¡ƒ‚|j |tdƒ ¡ |  	|¡}|  
|¡}t  ||¡}||fS )Nr   é   zFAttention mask shape {} mismatch with Attention logit tensor shape {}.Úinf)r	   ÚbmmÚ	transposer`   r,   r$   ÚdataÚmasked_fill_Úfloatrd   rb   )r   ÚqÚkÚvÚ	attn_maskÚattnr6   r   r   r   r7   z   s   ý

z!ScaledDotProductAttention.forward)r^   rA   r\   r   r   r?   r   r]   s   s    r]   c                       sF   e Zd ZdZd‡ fdd„	Zddd„Zdd	„ Zd
d„ Zddd„Z‡  Z	S )ÚMultiHeadAttentionz%
    Multi-head attention module
    r^   Nc                    s  t t| ƒ ¡  || _|| _|| _|sd| _nd| _| jr|| | _|| _t	 
t || j|d ¡¡| _t	 
t || j|d ¡¡| _t	 
t || j|d ¡¡| _t	 
t || j|d ¡¡| _t	 
t || j|d ¡¡| _t	 
t || j|d ¡¡| _t | j¡ t | j¡ t | j¡ t | j¡ t | j¡ t | j¡ n3t	 
t |||¡¡| _t	 
t |||¡¡| _t	 
t |||¡¡| _t | j¡ t | j¡ t | j¡ t||d| _t|ƒ| _| jsët	j|| |dd| _nt	j||d  | jdd| _t	j||d  | jdd| _t |ƒ| _!d S )NFTrg   ©rf   ©Úbias)"r>   rs   r   Ún_headÚd_kÚd_vÚpartitionedÚ	d_contentÚd_positionalrL   rM   r	   ÚFloatTensorÚw_qs1Úw_ks1Úw_vs1Úw_qs2Úw_ks2Úw_vs2ÚinitÚxavier_normal_Úw_qsÚw_ksÚw_vsr]   Ú	attentionrF   Ú
layer_normÚLinearÚprojÚproj1Úproj2r=   Úresidual_dropout)r   rw   re   rx   ry   r   rf   r|   r?   r   r   r   ž   sF   

zMultiHeadAttention.__init__c              
   C   s†  |  | jd¡ | jd| d¡¡}|d u r|}n|  | jd¡ | jd| d¡¡}| js@t || j¡}t || j¡}t || j	¡}n~t 
t |d d …d d …d | j…f | j¡t |d d …d d …| jd …f | j¡gd¡}t 
t |d d …d d …d | j…f | j¡t |d d …d d …| jd …f | j¡gd¡}t 
t |d d …d d …d | j…f | j¡t |d d …d d …| jd …f | j¡gd¡}|||fS )Nr   r   )Úrepeatrw   Úviewr,   rz   r	   ri   r†   r‡   rˆ   Úcatr{   r~   r   r   r‚   r€   rƒ   )r   ÚinpÚqk_inpÚv_inp_repeatedÚqk_inp_repeatedÚq_sÚk_sÚv_sr   r   r   Úsplit_qkv_packedÓ   s6     $$þý$$þý$$þý
z#MultiHeadAttention.split_qkv_packedc                 C   s¤  | j }| j| j}}|j}|j}	| ||	||f¡}
| ||	||f¡}| ||	||f¡}|j|	|ftd}tt	|j
d d… |j
dd … ƒƒD ]d\}\}}|d d …||…d d …f |
d d …|d || …d d …f< |d d …||…d d …f |d d …|d || …d d …f< |d d …||…d d …f |d d …|d || …d d …f< ||d || …f  d¡ qE|
 d||¡| d||¡| d||¡| d¡ |	||¡ |dd¡|  |d¡fS )N©r   r   r   F)rw   rx   ry   r   r   Ú	new_zerosÚnew_onesÚDTYPEÚ	enumerateÚzipr   r.   r‘   Ú	unsqueezeÚexpandr   )r   r—   r˜   r™   r5   rw   rx   ry   Ú
len_paddedÚmb_sizeÚq_paddedÚk_paddedÚv_paddedÚinvalid_maskÚiÚstartÚendr   r   r   Úpad_and_rearrangeí   s&   .666ûz$MultiHeadAttention.pad_and_rearrangec                 C   sä   | j }| |d| j¡}| js%t |dd¡ ¡  d|| j ¡}|  |¡}|S | jd }|d d …d d …d |…f }|d d …d d …|d …f }t |dd¡ ¡  d|| ¡}t |dd¡ ¡  d|| ¡}t |  	|¡|  
|¡gd¡}|S ©Nr   r   r   rg   )rw   r‘   ry   rz   r	   rj   Ú
contiguousrŒ   r’   r   rŽ   )r   Úoutputsrw   Úd_v1Úoutputs1Úoutputs2r   r   r   Ú	combine_v	  s$    

öþýzMultiHeadAttention.combine_vc                 C   sx   |}| j ||d\}}}|  ||||¡\}}	}
}}| j||	|
|d\}}|| }|  |¡}|  ||¡}|  || ¡|fS )N)r”   ©rq   )rš   r¬   r‰   r³   r   rŠ   )r   r“   r5   r”   Úresidualr—   r˜   r™   r¥   r¦   r§   rq   Úoutput_maskÚoutputs_paddedÚattns_paddedr¯   r   r   r   r7   !  s   
þ
zMultiHeadAttention.forward)r^   r^   NrA   ©
r   r   r   r   r   rš   r¬   r³   r7   rE   r   r   r?   r   rs   ™   s    
5rs   c                       s*   e Zd ZdZd‡ fdd„	Zdd„ Z‡  ZS )ÚPositionwiseFeedForwardzˆ
    A position-wise feed forward module.

    Projects to a higher-dimensional space before applying ReLU, then projects
    back.
    r^   c                    sV   t t| ƒ ¡  t ||¡| _t ||¡| _t|ƒ| _t	|ƒ| _
t	|ƒ| _t ¡ | _d S rA   )r>   rº   r   rL   r‹   Úw_1Úw_2rF   rŠ   r=   Úrelu_dropoutr   ÚReLUÚrelu)r   rR   Úd_ffr½   r   r?   r   r   r   C  s   


z PositionwiseFeedForward.__init__c                 C   sD   |}|   |¡}|  |  |¡|¡}|  |¡}|  ||¡}|  || ¡S rA   )r»   r½   r¿   r¼   r   rŠ   )r   Úxr5   rµ   r6   r   r   r   r7   N  s   

zPositionwiseFeedForward.forward©r^   r^   rD   r   r   r?   r   rº   ;  s    rº   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )Ú"PartitionedPositionwiseFeedForwardr^   c                    sŒ   t ƒ  ¡  || | _t | j|d ¡| _t ||d ¡| _t |d | j¡| _t |d |¡| _t	|ƒ| _
t|ƒ| _t|ƒ| _t ¡ | _d S )Nrg   )r>   r   r{   rL   r‹   Úw_1cÚw_1pÚw_2cÚw_2prF   rŠ   r=   r½   r   r¾   r¿   )r   rR   rÀ   r|   r½   r   r?   r   r   r   Z  s   




z+PartitionedPositionwiseFeedForward.__init__c           	      C   s¦   |}|d d …d | j …f }|d d …| j d …f }|  |¡}|  |  |¡|¡}|  |¡}|  |¡}|  |  |¡|¡}|  |¡}t ||gd¡}|  	||¡}|  
|| ¡S )Nr   )r{   rÄ   r½   r¿   rÆ   rÅ   rÇ   r	   r’   r   rŠ   )	r   rÁ   r5   rµ   ÚxcÚxpÚoutputcÚoutputpr6   r   r   r   r7   f  s   



z*PartitionedPositionwiseFeedForward.forwardrÂ   r\   r   r   r?   r   rÃ   Y  s    rÃ   c                       sF   e Zd ZdZd‡ fdd„	Zddd	„Zd
d„ Zdd„ Zddd„Z‡  Z	S )ÚLabelAttentionzH
    Single-head Attention layer for label-specific representations
    TFr^   Nc                    sˆ  t t| ƒ ¡  || _|| _|| _|| _|| _|| _|| _	|| _
|s%d| _nd| _| jrð||kr7td||f ƒ‚|| | _|| _| j	rTtjt | j| j|d ¡dd| _ntjt | j|d ¡dd| _tjt | j| j|d ¡dd| _tjt | j| j|d ¡dd| _| j	rštjt | j| j|d ¡dd| _ntjt | j|d ¡dd| _tjt | j| j|d ¡dd| _tjt | j| j|d ¡dd| _t | j¡ t | j¡ t | j¡ t | j¡ t | j¡ t | j¡ nN| j	rtjt | j||¡dd| _ntjt | j|¡dd| _tjt | j||¡dd| _tjt | j||¡dd| _t | j¡ t | j¡ t | j¡ t||
d| _| j
rOt |ƒ| _!nt | jƒ| _!| jst| j
rjtj"| j| |dd| _#nEtj"||dd| _#n;| j
r—tj"| j|d  | jdd| _$tj"| j|d  | jdd| _%ntj"|d | jdd| _$tj"|d | jdd| _%| j
s½tj"|| jdd| _&t'|	ƒ| _(d S )NFTz>Unable to build LabelAttention.  d_model %d <= d_positional %drg   rH   rt   ru   ))r>   rÌ   r   rx   ry   Úd_lre   Úd_projÚuse_resdropÚq_as_matrixÚcombine_as_selfrz   r#   r{   r|   rL   rM   r	   r}   r~   r   r€   r   r‚   rƒ   r„   r…   r†   r‡   rˆ   r]   r‰   rF   rŠ   r‹   rŒ   r   rŽ   Úreduce_projr=   r   )r   re   rx   ry   rÍ   rÎ   rÑ   rÏ   rÐ   r   rf   r|   r?   r   r   r   }  sr   
$""$"" zLabelAttention.__init__c           	   
   C   sÌ  |  d¡}| | jd¡ | jd|  d¡¡}|d u r|}n| | jd¡ | jd|  d¡¡}| jsO| jr:t || j¡}n| j 	d¡}t || j
¡}t || j¡}n’| jr}t t |d d …d d …d | j…f | j¡t |d d …d d …| jd …f | j¡gd¡}nt | j 	d¡| j 	d¡gd¡}t t |d d …d d …d | j…f | j¡t |d d …d d …| jd …f | j¡gd¡}t t |d d …d d …d | j…f | j¡t |d d …d d …| jd …f | j¡gd¡}|||fS )Nr   r   r   )r,   r   rÍ   r‘   rz   rÐ   r	   ri   r†   r¡   r‡   rˆ   r’   r{   r~   r   r   r‚   r€   rƒ   )	r   r“   Úk_inpÚlen_inpr•   Úk_inp_repeatedr—   r˜   r™   r   r   r   rš   Í  sJ   
  $$þý

þý$$þý$$þý
zLabelAttention.split_qkv_packedc                 C   sè  | j }| j| j}}|j}|j}	| jr| ||	||f¡}
n| |	dd¡}
| ||	||f¡}| ||	||f¡}|j|	|ft	d}t
t|jd d… |jdd … ƒƒD ]g\}\}}| jrt|d d …||…d d …f |
d d …|d || …d d …f< |d d …||…d d …f |d d …|d || …d d …f< |d d …||…d d …f |d d …|d || …d d …f< ||d || …f  d¡ qP| jrÒ|
 d||¡}
| d¡ |	||¡ |dd¡}n
| d¡ |dd¡}|  |d¡}|
| d||¡| d||¡||fS )Nr   r›   r   F)rÍ   rx   ry   r   r   rÐ   rœ   r   r   rž   rŸ   r    r   r.   r‘   r¡   r¢   )r   r—   r˜   r™   r5   rw   rx   ry   r£   r¤   r¥   r¦   r§   r¨   r©   rª   r«   rq   r¶   r   r   r   r¬   ñ  s6   .666 ûz LabelAttention.pad_and_rearrangec                 C   s  | j }| |d| j¡}| js0| jr"t |dd¡ ¡  d|| j ¡}nt |dd¡}|  |¡}|S | jd }|d d …d d …d |…f }|d d …d d …|d …f }| jrqt |dd¡ ¡  d|| ¡}t |dd¡ ¡  d|| ¡}nt |dd¡}t |dd¡}t 	|  
|¡|  |¡gd¡}|S r­   )rÍ   r‘   ry   rz   rÑ   r	   rj   r®   rŒ   r’   r   rŽ   )r   r¯   rÍ   r°   r±   r²   r   r   r   r³     s.   "

ò þýzLabelAttention.combine_vc                    s@  |}|  d¡}ˆj||d\}}}ˆ |||ˆ ¡\}	}
}}}ˆj|	|
||d\}}ˆjs6| d|  d¡d¡}|| ‰ˆ ˆ¡‰ˆjr^ˆjrLˆ 	ˆˆ ¡‰nt
 ‡ ‡‡fdd„tˆjƒD ƒd¡‰ˆjrlˆ ˆ| ¡‰ˆ|fS tˆjƒD ]}ˆd d …|d d …f | ˆd d …|d d …f< qqˆ ˆ¡‰ˆ ˆ¡‰ˆ |d¡ ¡ ‰ˆ|fS )Nr   )rÓ   r´   r   r   c              	      s0   g | ]}ˆ  ˆd d …|d d …f ˆ ¡ d¡‘qS )Nr   )r   r¡   ©Ú.0r©   ©r5   r¯   r   r   r   Ú
<listcomp>\  s   0 z*LabelAttention.forward.<locals>.<listcomp>)r,   rš   r¬   r‰   rÐ   r   r³   rÏ   rÑ   r   r	   r’   ÚrangerÍ   rŠ   rÒ   r‘   r®   )r   r“   r5   rÓ   rµ   rÔ   r—   r˜   r™   r¥   r¦   r§   rq   r¶   r·   r¸   Úlr   rØ   r   r7   7  s2   

þ
$	ù.

zLabelAttention.forward)TFr^   r^   NrA   r¹   r   r   r?   r   rÌ   x  s    
P$(rÌ   c                       s:   e Zd ZdZ								d‡ fdd	„	Zd
d„ Z‡  ZS )ÚLabelAttentionModulez½
    Label Attention Module for label-specific representations
    The module can be used right after the Partitioned Attention, or it can be experimented with for the transition stack
    TFr^   Né   çš™™™™™É?c                    sÐ   t ƒ  ¡  || | _|sd| _n|r|nd| _|r9|| jkr'td|| jf ƒ‚tj|| j || j dd| _|}nd | _|}t||||||||	|
|| jƒ| _	|s[t
| j|||
ƒ| _d S t| j|| j||
ƒ| _d S )Nr   zRIllegal argument for d_input_proj: d_input_proj %d is smaller than d_positional %dFru   )r>   r   Úff_dimr|   r#   rL   r‹   Úinput_projectionrÌ   Úlabel_attentionrº   Úlal_ffrÃ   )r   re   Úd_input_projrx   ry   rÍ   rÎ   rÑ   rÏ   rÐ   r   rf   r|   rÀ   r½   Úlattn_partitionedÚd_inputr?   r   r   r   q  sH   


ö
ý
üzLabelAttentionModule.__init__c                    sv  ˆ j rˆ jdkr‡ fdd„|D ƒ}n	‡ fdd„|D ƒ}tdd„ |D ƒƒ}tj|td}t|ƒ}d}dg| }t|ƒD ]\}}	t|	ƒ||< |	D ]
}
|||< |d7 }qFq:|}t||d j	ƒ}g }t|ƒD ]\}}t|ƒD ]\}}||| k ry| 
|¡ qjqbt |¡}ˆ  ||¡\}}ˆ  ||¡}d	d„ t|ƒD ƒ}t|ƒD ]\}}|||   
|¡ q›t|ƒD ]\}}t |¡||< q­|S )
Nr   c              	      sL   g | ]"}t jˆ  |d d …d ˆ j …f ¡|d d …ˆ j d …f fdd‘qS )Nr   r_   )r	   r’   rà   r|   ©r×   Úsentence©r   r   r   rÙ   ­  s     þÿ
ÿz0LabelAttentionModule.forward.<locals>.<listcomp>c                    s   g | ]}ˆ   |¡‘qS r   )rà   ræ   rè   r   r   rÙ   ±  s    c                 s   s    | ]}|j d  V  qdS )r   N)Úshaperæ   r   r   r   Ú	<genexpr>³  s   € z/LabelAttentionModule.forward.<locals>.<genexpr>r›   r   c                 S   s   g | ]}g ‘qS r   r   rÖ   r   r   r   rÙ   Í  s    )rà   r|   Úsumr   rP   r   r   rŸ   r   r   Úappendr	   Ústackrá   râ   rÚ   )r   Úword_embeddingsÚtagged_word_listsÚ
packed_lenr5   r   r©   Úsentence_lengthsÚsentence_idxrç   ÚwordÚbatch_indicesÚ
new_embedsÚbatchÚword_idxÚembedÚnew_word_embeddingsÚlabeled_representationsÚ_Úfinal_labeled_representationsÚidxÚrepresentationr   rè   r   r7   ª  sF   

þ

þ
€þ
zLabelAttentionModule.forward)TFr^   r^   NrÝ   rÞ   TrD   r   r   r?   r   rÜ   k  s    ñ9rÜ   )Únumpyr   Ú	functoolsÚsysr	   Útorch.autogradr   Útorch.nnrL   Útorch.nn.initr„   Úboolrž   r   ÚautogradÚfunctionÚInplaceFunctionr    ÚModuler=   rF   r]   rs   rº   rÃ   rÌ   rÜ   r   r   r   r   Ú<module>   s(    $& # t