o
    –h€  ã                   @   s@   d Z ddlZddlmZ ddlm  mZ G dd„ dejƒZdS )zÄ
Copied from

https://jaketae.github.io/study/relative-positional-encoding/

Based on work from

Shaw et al: https://arxiv.org/pdf/1803.02155.pdf
Huang et al: https://arxiv.org/pdf/1809.04281.pdf
é    N)Únnc                       s.   e Zd Zd	‡ fdd„	Zdd„ Zdd„ Z‡  ZS )
ÚRelativeGlobalAttentioné   çš™™™™™¹?c              	      sª   t ƒ  ¡  t||ƒ\}}|rtdƒ‚|| _|| _|| _t ||¡| _	t ||¡| _
t ||¡| _t |¡| _t t ||¡¡| _|  dt t ||¡¡ d¡ d¡¡ d S )Nz&incompatible `d_model` and `num_heads`Úmaskr   )ÚsuperÚ__init__ÚdivmodÚ
ValueErrorÚmax_lenÚd_modelÚ	num_headsr   ÚLinearÚkeyÚvalueÚqueryÚDropoutÚdropoutÚ	ParameterÚtorchÚrandnÚErÚregister_bufferÚtrilÚonesÚ	unsqueeze)Úselfr   r   r   r   Úd_headÚ	remainder©Ú	__class__© úd/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/common/relative_global_attn.pyr      s&   
ÿýz RelativeGlobalAttention.__init__c                 C   sV  |j \}}}|| jkrtdƒ‚|  |¡ ||| jd¡ dddd¡}|  |¡ ||| jd¡ dd¡}|  	|¡ ||| jd¡ dd¡}| j| }| j
|d …d d …f  dd¡}	t ||	¡}
|  |
¡}t ||¡}|| | d¡d  }| jd d …d d …d |…d |…f }| |dktdƒ¡}tj|dd	}t ||¡}| dd¡}| ||d¡}|  |¡S )
Nz&sequence length exceeds model capacityéÿÿÿÿr   é   é   é   g      à?z-inf)Údim)Úshaper   r
   r   Úreshaper   Úpermuter   Ú	transposer   r   r   ÚmatmulÚskewÚsizer   Úmasked_fillÚfloatÚFÚsoftmaxr   )r   ÚxÚ
batch_sizeÚseq_lenÚ_Úk_tÚvÚqÚstartÚEr_tÚQErÚSrelÚQK_tÚattnr   Úoutr!   r!   r"   Úforward(   s*   
ÿ$  

"
zRelativeGlobalAttention.forwardc           	      C   sN   t  |d¡}|j\}}}}| ||||¡}|d d …d d …dd …d d …f }|S )N)r&   r   r&   )r1   Úpadr(   r)   )	r   r<   Úpaddedr4   r   Únum_rowsÚnum_colsÚreshapedr=   r!   r!   r"   r-   P   s
    zRelativeGlobalAttention.skew)r   r   )Ú__name__Ú
__module__Ú__qualname__r   rA   r-   Ú__classcell__r!   r!   r   r"   r      s    (r   )Ú__doc__r   r   Útorch.nn.functionalÚ
functionalr1   ÚModuler   r!   r!   r!   r"   Ú<module>   s
    