o
    h~=                     @   s   d Z ddlmZ ddlmZ ddlZddlZddlmZ ddl	m
Z
mZmZmZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZ G dd dejZdddZdZdZG dd dejZ G dd dejZ!G dd dZ"dS )a  
Based on

@inproceedings{akbik-etal-2018-contextual,
    title = "Contextual String Embeddings for Sequence Labeling",
    author = "Akbik, Alan  and
      Blythe, Duncan  and
      Vollgraf, Roland",
    booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
    month = aug,
    year = "2018",
    address = "Santa Fe, New Mexico, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/C18-1139",
    pages = "1638--1649",
}
    )Counter)
itemgetterN)pack_sequencepad_packed_sequencepack_padded_sequencePackedSequence)get_long_tensor)
PackedLSTM)open_read_texttensor_unsortunsort)SequenceUnitDropout)UNK_ID	CharVocabc                       s&   e Zd Zd fdd	Zdd Z  ZS )CharacterModelFTc              	      s&  t    || _|| _|rdnd| _|| _tjt|d | jd dd| _	| jr>tj
| j| jd  ddd	| _| jjj  t| jd | jd | jd
 d| jd
 dkrUdn|d | jd |d| _tt| j| jd
  d| jd | _tt| j| jd
  d| jd | _t|d | _d S )N      charchar_emb_dimr   padding_idxchar_hidden_dimF)biaschar_num_layersTdropoutchar_rec_dropoutbatch_firstr   rec_dropoutbidirectional)super__init__argspadnum_dirattnnn	Embeddinglenchar_embLinear	char_attnweightdatazero_r	   charlstm	Parametertorchzeroscharlstm_h_initcharlstm_c_initDropoutr   )selfr"   vocabr#   r   	attention	__class__ Z/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/common/char_model.pyr!   "   s   
 "((zCharacterModel.__init__c              
   C   s2  |  | |}|d}t||dd}| j||| j| j| jd  || jd 	 | j
| j| jd  || jd 	 fd}| jrj|d }	t| |  |	j}
t|	j|
 |	j}	t|	dd\}	}|	d}n|d \}}|dd  dd	 |d	}t||}t||}| jrt|ddd }|S )
Nr   Tr   r   r   hxr   )r   r)   sizer   r/   r3   expandr$   r"   
contiguousr4   r%   r1   sigmoidr+   r-   r   batch_sizesr   sum	transposeviewr   r   splitr#   )r6   chars
chars_maskword_orig_idxsentlenswordlensembs
batch_sizeoutput	char_repsweights_reshcr;   r;   r<   forward7   s(   
$$ 
zCharacterModel.forward)FFT)__name__
__module____qualname__r!   rY   __classcell__r;   r;   r9   r<   r   !   s    r   c           
   	   C   s  t  }tj| rtt| }ntj| d g}tj| d } |D ])}tj| |}t|}|D ]	}|	t
| q4W d   n1 sHw   Y  q$t|dkrXtdt
| D ]}|| |k ri||= q^tdd | D g}t|d dkrtdt|}	|	S )a<  
    Build a vocab for a CharacterLanguageModel

    Requires a large amount of memory, but only need to build once

    here we need some trick to deal with excessively large files
    for each file we accumulate the counter of characters, and
    at the end we simply pass a list of chars to the vocab builder
    r   r   NzTraining data was empty!c                 S      g | ]}|d  qS r   r;   .0xr;   r;   r<   
<listcomp>p       z&build_charlm_vocab.<locals>.<listcomp>zEAll characters in the training data were less frequent than --cutoff!)r   ospathisdirsortedlistdirrJ   joinr
   updatelistr(   
ValueErrorkeysmost_commonr   )
rf   cutoffcounter	filenamesfilenamefinlinekr-   r7   r;   r;   r<   build_charlm_vocabR   s0   

rw   
 c                       s   e Zd Zd fdd	ZdddZdd	 Zd
d Zdd Zdd Zdd Z	d fdd	Z
dd Zdd ZedddZedddZ  ZS ) CharacterLanguageModelFTc              	      s  t    || _|| _|| _|| _d| _tjt	| jd | jd d d| _
t| jd | jd | jd d| jd dkr<dn|d	 | jd
 dd| _tt| jd d| jd | _tt| jd d| jd | _t| jd t	| jd | _t|d	 | _t|ddt| _d S )NTr   r   r   r   r   r   r   char_dropoutr   Fr   char_unit_dropout)r    r!   r"   r7   is_forward_lmr#   finetuner&   r'   r(   r)   r	   r/   r0   r1   r2   r3   r4   r*   decoderr5   r   r   getr   r{   )r6   r"   r7   r#   r}   r9   r;   r<   r!   {   s   
""""zCharacterLanguageModel.__init__Nc                 C   s   |  |}| | |}|d}t||dd}|d u r=| j| jd || jd  | j	| jd || jd  f}| j
|||d\}}| t|ddd }| |}|||fS )Nr   Tr=   r   r   r>   )r{   r   r)   rB   r   r3   rC   r"   rD   r4   r/   r   r   )r6   rK   charlenshiddenrP   rQ   rR   decodedr;   r;   r<   rY      s   



zCharacterLanguageModel.forwardc                    s   t  9 | ||\ }} fddt|D }t||}t|}| jr5t|ddd }W d    |S W d    |S 1 s@w   Y  |S )Nc                       g | ]
\}} ||f qS r;   r;   ra   ioffsetsrR   r;   r<   rc          z=CharacterLanguageModel.get_representation.<locals>.<listcomp>Tr=   r   )r1   no_gradrY   	enumerater   r   r#   r   )r6   rK   charoffsetsr   char_orig_idxrU   rV   r;   r   r<   get_representation   s   



z)CharacterLanguageModel.get_representationc           	         s   t |  j}|    fddt|D }|jtddd dd |D }dd |D }t|t| 	t
dj|d	}t & | ||\}}}d
d t||D }t|dd |D }W d    |S 1 sjw   Y  |S )Nc                    s$   g | ]\}}  |t||fqS r;   )mapr(   )ra   idxwordr7   r;   r<   rc         $ zBCharacterLanguageModel.per_char_representation.<locals>.<listcomp>r   Tkeyreversec                 S   r^   r_   r;   r`   r;   r;   r<   rc      rd   c                 S   r^   )r   r;   r`   r;   r;   r<   rc      rd   pad_iddevicec                 S   s$   g | ]\}}|d |d d f qS Nr;   )ra   rb   yr;   r;   r<   rc      r   c                 S   r^   )r   r;   r`   r;   r;   r<   rc      rd   )next
parametersr   
char_vocabr   sortr   r   r(   unit2id
CHARLM_ENDtor1   r   rY   zipr   )	r6   wordsr   all_datarK   	char_lenschar_tensorrR   rU   r;   r   r<   per_char_representation   s    

z.CharacterLanguageModel.per_char_representationc                    sX  | j }|  }t|  j}g }t|D ]D\}}|s#dd t|D }tg}g }	|D ]}
||
 |	t
 |		t|d  q*|sF|	  ||}|	||	t|t|f q|jtddd tt| \}}}}t|t||t
dj|d}t " | ||\ }} fd	dt|D }t||}W d
   |S 1 sw   Y  |S )a  
        Return values from this charlm for a list of list of words

        input: [[str]]
          K sentences, each of length Ki (can be different for each sentence)
        output: [tensor(Ki x dim)]
          list of tensors, each one with shape Ki by the dim of the character model

        Values are taken from the last character in a word for each word.
        The words are effectively treated as if they are whitespace separated
        (which may actually be somewhat inaccurate for languages such as Chinese or for MWT)
        c                 S   s   g | ]	}|d d d qS )NrA   r;   r`   r;   r;   r<   rc      s    zDCharacterLanguageModel.build_char_representation.<locals>.<listcomp>r   r   Tr   r   r   c                    r   r;   r;   r   r   r;   r<   rc      r   N)r}   r   r   r   r   r   reversedCHARLM_STARTextendappendr   r(   r   r   r   r   tupler   r   r   r   r1   r   rY   r   )r6   	sentencesrY   r7   r   r   r   r   rK   r   wchar_offsetsr   orig_idxrU   rV   r;   r   r<   build_char_representation   s8   


 

z0CharacterLanguageModel.build_char_representationc                 C   
   | j d S )Nr   )r"   r6   r;   r;   r<   
hidden_dim      
z!CharacterLanguageModel.hidden_dimc                 C   r   )Nr   r   r   r;   r;   r<   r      r   z!CharacterLanguageModel.char_vocabc                    s.   |s
t  | dS | jrt  | dS dS )z
        Override the default train() function, so that when self.finetune == False, the training mode 
        won't be impacted by the parent models' status change.
        N)r    trainr~   )r6   moder9   r;   r<   r      s
   zCharacterLanguageModel.trainc                 C   s(   | j d  | j|  | j| jd}|S )Nr   )r7   r"   
state_dictr#   r}   )r7   r   r"   r#   r}   )r6   stater;   r;   r<   
full_state   s   z!CharacterLanguageModel.full_statec                 C   s6   t jt j|d dd |  }tj||dd d S )Nr   Texist_okF_use_new_zipfile_serialization)re   makedirsrf   rJ   r   r1   save)r6   rs   r   r;   r;   r<   r      s   zCharacterLanguageModel.savec                 C   sL   dt |d i}| |d ||d |d }||d  |  ||_|S )Nr   r7   r"   r#   r}   r   )r   load_state_dictevalr~   )clsr   r~   r7   modelr;   r;   r<   from_full_state  s   z&CharacterLanguageModel.from_full_statec                 C   s8   t j|dd dd}d|v r| ||S | |d |S )Nc                 S      | S r   r;   storagelocr;   r;   r<   <lambda>      z-CharacterLanguageModel.load.<locals>.<lambda>Tweights_onlyr   r   )r1   loadr   )r   rs   r~   r   r;   r;   r<   r     s   zCharacterLanguageModel.load)FTr   TF)rZ   r[   r\   r!   rY   r   r   r   r   r   r   r   r   classmethodr   r   r]   r;   r;   r9   r<   rz   y   s    

-
rz   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )!CharacterLanguageModelWordAdapterzT
    Adapts a character model to return embeddings for each character in a word
    c                    s   t    || _d S r   )r    r!   charlms)r6   r   r9   r;   r<   r!     s   

z*CharacterLanguageModelWordAdapter.__init__c                 C   s   dd |D }g }| j D ]A}||}tjt|tdd |D |d jd |d j|d jd}t	|D ]\}}|||d |jd d d f< q5|
| qtj|dd	}|S )
Nc                 S   s   g | ]}t | t qS r;   )r   r   r`   r;   r;   r<   rc     s    z=CharacterLanguageModelWordAdapter.forward.<locals>.<listcomp>c                 s   s    | ]}|j d  V  qdS )r   N)shaper`   r;   r;   r<   	<genexpr>#  s    z<CharacterLanguageModelWordAdapter.forward.<locals>.<genexpr>r   r   )dtyper   r   )dim)r   r   r1   r2   r(   maxr   r   r   r   r   cat)r6   r   padded_repscharlmrep
padded_repr   rowr;   r;   r<   rY     s   

<z)CharacterLanguageModelWordAdapter.forwardc                 C   s   t dd | jD S )Nc                 s   s    | ]}|  V  qd S r   )r   )ra   r   r;   r;   r<   r   +  s    z?CharacterLanguageModelWordAdapter.hidden_dim.<locals>.<genexpr>)rG   r   r   r;   r;   r<   r   *  s   z,CharacterLanguageModelWordAdapter.hidden_dim)rZ   r[   r\   __doc__r!   rY   r   r]   r;   r;   r9   r<   r     s
    r   c                   @   s:   e Zd ZdddZdddZedd	 ZedddZdS )CharacterLanguageModelTrainerr   r   c                 C   s.   || _ || _|| _|| _|| _|| _|| _d S r   )r   params	optimizer	criterion	schedulerepochglobal_step)r6   r   r   r   r   r   r   r   r;   r;   r<   r!   .  s   
z&CharacterLanguageModelTrainer.__init__Tc                 C   s   t jt j|d dd | j | j| jd}|r&| jd ur&| j	 |d< |r4| j
d ur4| j
	 |d< |rB| jd urB| j	 |d< tj||dd	 d S )
Nr   Tr   )r   r   r   r   r   r   Fr   )re   r   rf   rJ   r   r   r   r   r   r   r   r   r1   r   )r6   rs   fullr   r;   r;   r<   r   7  s   z"CharacterLanguageModelTrainer.savec                 C   s   t |||d dkrdndd}||d }dd | D }tjj||d	 |d
 |d d}tj }tjjj	|d|d |d d}| |||||S )N	directionrY   TF)r}   r   c                 S      g | ]}|j r|qS r;   requires_gradra   paramr;   r;   r<   rc   J      z@CharacterLanguageModelTrainer.from_new_model.<locals>.<listcomp>lr0momentumweight_decaylrr   r   annealpatienceverbosefactorr   )
rz   r   r   r1   optimSGDr&   CrossEntropyLosslr_schedulerReduceLROnPlateau)r   r"   r7   r   r   r   r   r   r;   r;   r<   from_new_modelF  s    
z,CharacterLanguageModelTrainer.from_new_modelFc                 C   s   t j|dd dd}t|d |}||d }dd | D }t jj||d	 |d
 |d d}d|v r=||d  t j	
 }d|v rM||d  t jjj|d|d |d d}	d|v rg|	|d  |dd}
|dd}| |||||	|
|S )z
        Load the model along with any other saved state for training

        Note that you MUST set finetune=True if planning to continue training
        Otherwise the only benefit you will get will be a warm GPU
        c                 S   r   r   r;   r   r;   r;   r<   r   Y  r   z4CharacterLanguageModelTrainer.load.<locals>.<lambda>Tr   r   r   c                 S   r   r;   r   r   r;   r;   r<   rc   ]  r   z6CharacterLanguageModelTrainer.load.<locals>.<listcomp>r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r1   r   rz   r   r   r   r   r   r   r&   r   r   r   r   )r   r"   rs   r~   r   r   r   r   r   r   r   r   r;   r;   r<   r   Q  s    
z"CharacterLanguageModelTrainer.loadN)r   r   r   r   )rZ   r[   r\   r!   r   r   r   r   r;   r;   r;   r<   r   -  s    

	

r   r_   )#r   collectionsr   operatorr   re   r1   torch.nnr&   torch.nn.utils.rnnr   r   r   r   stanza.models.common.datar    stanza.models.common.packed_lstmr	   stanza.models.common.utilsr
   r   r   stanza.models.common.dropoutr   stanza.models.common.vocabr   r   Moduler   rw   r   r   rz   r   r   r;   r;   r;   r<   <module>   s(    
1$ 