o
    –h9  ã                   @   s
  d dl Z d dlZd dlZd dlZd dlmZ d dlm  mZ	 d dl
mZmZmZmZmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d d	l m!Z! d d
l"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z) e *d¡Z+G dd„ dej,ƒZ-dS )é    N)Úpad_packed_sequenceÚpack_padded_sequenceÚpack_sequenceÚpad_sequenceÚPackedSequence)Ú
map_to_idsÚget_long_tensor)ÚForwardCharlmNotFoundErrorÚBackwardCharlmNotFoundError)Ú
PackedLSTM)ÚWordDropoutÚLockedDropout)ÚCharacterModelÚCharacterLanguageModel©ÚCRFLoss)Ú	load_bert)Úattach_bert_model)ÚPAD_IDÚUNK_IDÚEMPTY_ID)Úextract_bert_embeddingsÚstanzac                       sJ   e Zd Zd‡ fdd„	Zdd„ Zdd„ Zd	d
„ Zdd„ Zedd„ ƒZ	‡  Z
S )Ú	NERTaggerNFc	                    s<  t ƒ  ¡  |ˆ _|ˆ _g ˆ _d}	ˆ jd dkr|ˆ j dd¡}
t tˆ jd ƒˆ jd t	¡}|
r4dˆ jv rAˆ  
d|¡ ˆ jj ¡  n|ˆ _|d urMˆ  |¡ d ˆ _dˆ jv rut tˆ jd ƒˆ jd t	¡ˆ _tj ˆ jj¡ |
suˆ jj ¡  |	ˆ jd 7 }	|ˆ _tˆ ||ˆ j dd	¡|ƒ ˆ j d
d ¡r·| dd	¡r­tj|d dd	dˆ _tj ˆ jj¡ nd ˆ _|	ˆ jjj7 }	ˆ jd r8ˆ jd dkr8ˆ jd r&|d d u sÙtj |d ¡såtd |d ¡|d ƒ‚|d d u sótj |d ¡sÿtd |d ¡|d ƒ‚ˆ  
dtj |d d	d¡ ˆ  
dtj |d d	d¡ |	ˆ j! "¡ ˆ j# "¡  7 }	nt$||dd	dˆ _%|	ˆ jd d 7 }	ˆ j dd	¡rHt |	|	¡ˆ _&nd ˆ _&t'|	ˆ jd ˆ jd ddˆ jd dkradnˆ jd dˆ _(d ˆ _)tj*t+ ,dˆ jd  dˆ jd ¡d	d ˆ _-tj*t+ ,dˆ jd  dˆ jd ¡d	d ˆ _.ˆ jd!  /¡ }t|ƒˆ _0ˆ j d"¡rát ˆ jd d |d ¡g}t1|d d#… |dd … ƒD ]\}}| 2t ˆ jd d | |¡¡ qÄt 3|¡ˆ _4nt 3‡ fd$d%„|D ƒ¡ˆ _4ˆ j4D ]	}|j5j6 7¡  qñt 3d&d%„ |D ƒ¡ˆ _8t 9|d ¡ˆ _:t;|d' ƒˆ _<t=|d( ƒˆ _>d S ))Nr   Úword_emb_dimÚemb_finetuneTÚwordÚdeltaÚword_embÚuse_peftFÚ
bert_modelÚbert_hidden_layersé   )ÚbiasÚcharÚchar_emb_dimÚcharlmÚcharlm_forward_filezUCould not find forward character model: {}  Please specify with --charlm_forward_fileÚcharlm_backward_filezWCould not find backward character model: {}  Please specify with --charlm_backward_fileÚcharmodel_forward)ÚfinetuneÚcharmodel_backward)ÚbidirectionalÚ	attentionÚchar_hidden_dimé   Úinput_transformÚ
hidden_dimÚ
num_layersÚdropout)Úbatch_firstr,   r3   )Úrequires_gradÚtagÚconnect_output_layerséÿÿÿÿc                    s"   g | ]}t  ˆ jd  d |¡‘qS )r1   r/   )ÚnnÚLinearÚargs©Ú.0Únum_tag©Úself© úR/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/ner/model.pyÚ
<listcomp>y   s   " z&NERTagger.__init__.<locals>.<listcomp>c                 S   s   g | ]}t |ƒ‘qS rA   r   r<   rA   rA   rB   rC   |   ó    Úword_dropoutÚlocked_dropout)?ÚsuperÚ__init__Úvocabr;   Úunsaved_modulesÚgetr9   Ú	EmbeddingÚlenr   Úadd_unsaved_moduler   ÚweightÚdetach_Úinit_embÚ	delta_embÚinitÚzeros_Ú	peft_namer   r:   Úbert_layer_mixr    ÚconfigÚhidden_sizeÚosÚpathÚexistsr	   Úformatr
   r   Úloadr)   r1   r+   r   Ú	charmodelr0   r   Ú
taggerlstmÚdrop_replacementÚ	ParameterÚtorchÚzerosÚtaggerlstm_h_initÚtaggerlstm_c_initÚlensÚnum_output_layersÚzipÚappendÚ
ModuleListÚtag_clfsr#   ÚdataÚzero_ÚcritsÚDropoutÚdropr   Úworddropr   Ú
lockeddrop)r@   r;   rI   Ú
emb_matrixÚfoundation_cacher    Úbert_tokenizerÚforce_bert_savedrU   Ú
input_sizer   r   Útag_lengthsrk   Úprev_lengthÚnext_lengthÚtag_clf©Ú	__class__r?   rB   rH      s~   


 ÿ**
"$
zNERTagger.__init__c                 C   sh   t |tjƒrt |¡}t| jd ƒ}| jd }| ¡ ||fks*J d 	||| ¡ ¡ƒ‚| j
jj |¡ d S )Nr   r   z9Input embedding matrix must match size: {} x {}, found {})Ú
isinstanceÚnpÚndarrayrb   Ú
from_numpyrM   rI   r;   Úsizer\   r   rO   rl   Úcopy_)r@   rs   Ú
vocab_sizeÚdimrA   rA   rB   rQ   ‚   s   

ÿzNERTagger.init_embc                 C   s    |  j |g7  _ t| ||ƒ d S ©N)rJ   Úsetattr)r@   ÚnameÚmodulerA   rA   rB   rN   ‹   s   zNERTagger.add_unsaved_modulec                 C   s`   dg}|   ¡ D ]\}}|jr%| d¡d dvr%| d|t |¡ ¡ f ¡ qt d 	|¡¡ d S )NzNORMS FOR MODEL PARAMTERSÚ.r   )r)   r+   z	  %s %.6gÚ
)
Únamed_parametersr5   Úsplitri   rb   ÚnormÚitemÚloggerÚinfoÚjoin)r@   Úlinesrˆ   ÚparamrA   rA   rB   Ú	log_norms   s   €zNERTagger.log_normsc           ,         s4  t ˆ  ¡ ƒj}‡fdd„}g }t|ƒ}ˆ jd dkrxˆ  ˆ j|ˆ jd ¡\}}| |¡}| |¡}ˆ  |¡}dˆ jv roˆ j	d uroˆ  ˆ j|ˆ jd ¡\}}| |¡}t
 |t¡}t
 |t¡}|| }t||< ˆ  	|¡}|| }||ƒ‰|ˆg7 }ˆ jd urÁt ˆ  ¡ ƒj}tˆ jd ˆ jˆ j||dˆ jd ur™ˆ jjnd ˆ j d	d¡ ˆ jd
	}ˆ jd ur´‡ fdd„|D ƒ}t|dd}|||ƒg7 }‡fdd„}ˆ jd r!ˆ jd dkr!ˆ j dd ¡rˆ j |d |	d |
|¡}t|j|jƒ}ˆ j |d |	d |
|¡}t|j|jƒ}|||g7 }nˆ  |||ˆ|¡}t|j|jƒ}||g7 }t
 dd„ |D ƒd¡}ˆ jd dkr;ˆ  |ˆ j ¡}ˆ  !|¡}||ƒ}ˆ  "|¡}||ƒj}ˆ j#rWˆ  #|¡}t||d jƒ}ˆ j$|ˆˆ j% &dˆ jd  |ˆ jd ¡ '¡ ˆ j( &dˆ jd  |ˆ jd ¡ '¡ fd\}}|j}ˆ  !|¡}||ƒ}ˆ  "|¡}||ƒj}d} g }!g }"t)t*ˆ j+ˆ j,ƒƒD ]d\}#\}$}%ˆ j d¡rÂ|#dkrË||$|ƒƒ '¡ }&n||&ƒj}'t
j||'gdd}(||$|(ƒƒ '¡ }&t
 |d d …d d …|#f t-¡})|%|&t
 .|)|¡|d d …d d …|#f ƒ\}*}+| |* } |! /|&¡ |" /|+¡ q°| |!|"fS )Nc                    s   t | ˆ ddS )NT©r4   )r   ©Úx)ÚsentlensrA   rB   Úpack™   s   zNERTagger.forward.<locals>.packr   r   r   r   r    FÚbert_finetune)Úkeep_endpointsr2   ÚdetachrU   c                    s0   g | ]}ˆ   |¡ d ¡|jd dˆ j j  ‘qS )r/   ©Úaxis)rV   ÚsqueezeÚsumÚin_features)r=   Úfeaturer?   rA   rB   rC   Ç   s   0 z%NERTagger.forward.<locals>.<listcomp>Tr–   c                    s   t t| ˆ jƒddd S )NTr–   r   )r   r   Úbatch_sizesr—   )r   rA   rB   ÚpadÌ   s   zNERTagger.forward.<locals>.padr$   r%   r&   r"   c                 S   s   g | ]}|j ‘qS rA   )rl   )r=   r˜   rA   rA   rB   rC   Û   s    rE   r/   r2   r1   )Úhxr7   rž   )0ÚnextÚ
parametersÚdevicerM   r;   Úextract_static_embeddingsrI   Útor   rR   rb   Úeqr   Ú	not_equalr   r    r   ru   rV   r¢   rK   rU   r   r)   Úget_representationr   rl   r¤   r+   r^   Úcatrq   r`   rp   rr   r0   r_   rd   ÚexpandÚ
contiguousre   Ú	enumeraterh   rk   rn   r   Ú
bitwise_orri   ),r@   Ú	sentencesÚ	wordcharsÚwordchars_maskÚtagsÚword_orig_idxr™   ÚwordlensÚcharsÚcharoffsetsÚcharlensÚchar_orig_idxr©   rš   ÚinputsÚ
batch_sizeÚstatic_wordsÚ	word_maskÚword_static_embÚdelta_wordsÚ_Údelta_unk_maskÚstatic_unk_maskÚunk_maskrR   Úprocessed_bertr¥   Úchar_reps_forwardÚchar_reps_backwardÚ	char_repsÚlstm_inputsÚlstm_outputsÚlossÚlogitsÚtransÚidxr{   ÚcritÚnext_logitsÚpacked_logitsÚinput_logitsÚtag_maskÚ	next_lossÚ
next_transrA   )r@   r™   r   rB   Úforward–   s˜   



	


ý





""þ



*

zNERTagger.forwardc           	         s~   g }|   dd¡rdd„ ‰ ndd„ ‰ t|ƒD ]\}}| ‡ fdd„|D ƒ¡g}| |d ¡ qt|t|ƒƒ}t |t¡}||fS )	NÚ	lowercaseTc                 S   s   |   ¡ S r†   )Úlowerr—   rA   rA   rB   Ú<lambda>  s    z5NERTagger.extract_static_embeddings.<locals>.<lambda>c                 S   s   | S r†   rA   r—   rA   rA   rB   rÜ     s    c                    s   g | ]}ˆ |ƒ‘qS rA   rA   )r=   Úw©ÚcaserA   rB   rC     rD   z7NERTagger.extract_static_embeddings.<locals>.<listcomp>r   )	rK   r²   Úmapri   r   rM   rb   r¬   r   )	r;   ÚsentsrI   Ú	processedrÑ   ÚsentÚprocessed_sentÚwordsÚ
words_maskrA   rÞ   rB   rª     s   
z#NERTagger.extract_static_embeddings)NNNNFN)Ú__name__Ú
__module__Ú__qualname__rH   rQ   rN   r•   rÙ   Ústaticmethodrª   Ú__classcell__rA   rA   r|   rB   r      s    h	qr   ).rY   ÚloggingÚnumpyr   rb   Útorch.nnr9   Útorch.nn.functionalÚ
functionalÚFÚtorch.nn.utils.rnnr   r   r   r   r   Ústanza.models.common.datar   r   Ústanza.models.common.exceptionsr	   r
   Ú stanza.models.common.packed_lstmr   Ústanza.models.common.dropoutr   r   Ústanza.models.common.char_modelr   r   Ústanza.models.common.crfr   Ú%stanza.models.common.foundation_cacher   Ústanza.models.common.utilsr   Ústanza.models.common.vocabr   r   r   Ú#stanza.models.common.bert_embeddingr   Ú	getLoggerr   ÚModuler   rA   rA   rA   rB   Ú<module>   s&    
