o
    h|                     @   s   d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lT dd
lmZmZ edZeedG dd deZdS )z0
Processor for performing named entity tagging.
    N)doc)ForwardCharlmNotFoundErrorBackwardCharlmNotFoundError)unsort)
DataLoader)Trainer
merge_tags)*)UDProcessorregister_processorstanza)namec                       sn   e Zd ZeegZeegZdd Zdd Z	dd Z
dd Z fd	d
Zdd Z fddZdddZ  ZS )NERProcessorc                    sN   |  d }|d ur|d}dd |D }|S  fdd| dg D }|S )N;c                 S   s   g | ]}|r|nd qS )N .0xr   r   X/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/pipeline/ner_processor.py
<listcomp>    s    z2NERProcessor._get_dependencies.<locals>.<listcomp>c                    s   g | ]}|  qS r   )getr   dep_namer   r   r   "   s    dependencies)r   split)selfconfigr   r   r   r   r   _get_dependencies   s   
zNERProcessor._get_dependenciesc                 C   s  | d}t|tr|d}| |d}| |d}| |d}i | _| dd }|rNt|tr7|| jd< n|d}t|D ]\}	}
|
rMt|
| j|	< q@g | _t	||||D ]n\}}}}t
d|||| |ro|j|nd }||d	}| j t| jd }|d ur||d< zt|||||jd
}W n+ ty } z
td|j |jd d }~w ty } z
td|j |jd d }~ww | j| qX| jd | _|| _d S )N
model_pathr   forward_charlm_pathbackward_charlm_pathpretrain_pathpredict_tagsetr   zBLoading %s with pretrain %s, forward charlm %s, backward charlm %s)charlm_forward_filecharlm_backward_file)args
model_filepretraindevicefoundation_cachezcCould not find the forward charlm %s.  Please specify the correct path with ner_forward_charlm_pathzeCould not find the backward charlm %s.  Please specify the correct path with ner_backward_charlm_path)r   
isinstancestrr   r   _predict_tagsetint	enumeratetrainersziploggerdebugr*   load_pretrainlenr   r   filenamer   append_trainermodel_paths)r   r   pipeliner)   r9   charlm_forward_filescharlm_backward_filespretrain_filesr#   	piece_idxpiecer   r"   charlm_forwardcharlm_backwardr(   r&   trainerer   r   r   _set_up_model%   sL   





zNERProcessor._set_up_modelc                 C   st   t | jdkrtd| jd j| _g | _| jD ]}|j}dd | D }|| | j	| q| jd | _
dS )zV Finalize the configurations for this processor, based off of values from a UD model. r   z#Somehow there are no models loaded!c                 S   s    i | ]\}}t |s||qS r   )r   filter_out_option)r   kvr   r   r   
<dictcomp>]   s     z5NERProcessor._set_up_final_config.<locals>.<dictcomp>N)r5   r0   RuntimeErrorvocab_vocabconfigsr&   itemsupdater7   _config)r   r   rB   loaded_argsr   r   r   _set_up_final_configS   s   

z!NERProcessor._set_up_final_configc                 C   s   dd | j S )NzNERProcessor(%s)r   )joinr9   r   r   r   r   __str__b   s   zNERProcessor.__str__c                    s   t    d| _dS )ze Drop memory intensive resources if keeping this processor around for reasons other than running it. N)supermark_inactiver0   rS   	__class__r   r   rV   e   s   

zNERProcessor.mark_inactivec           
      C   s  t  = g }t| j| jD ]+\}}t||d ||jdd|jjd}g }t	|D ]\}}||
|7 }q(|| qW d    n1 sDw   Y  dd t| D }|jjtjgdd |D dd |jjtjgd	d t| D dd t|j }	t|	 d
 |jS )N
batch_sizeTF)rJ   
evaluationpreprocess_tagsbert_tokenizerc                 S   s   g | ]}t | qS r   r   r   r   r   r   r   w   s    z(NERProcessor.process.<locals>.<listcomp>c                 S   s   g | ]	}|D ]}|qqS r   r   r   r   yr   r   r   r   x   s    )to_tokenc                 S   s"   g | ]}t | D ]}t|qqS r   )r1   tupler]   r   r   r   r   y   s   " z entities found in document.)torchno_gradr1   r0   rL   r   rJ   modelr\   r/   predictr7   r   setNER	MULTI_NERr5   
build_entsr2   r3   )
r   document	all_predsrB   r   batchpredsibtotalr   r   r   processj   s    
  $zNERProcessor.processc                    s"   t  |}|D ]}|  q|S )zL
        NER processor has a collation step after running inference
        )rU   bulk_processrh   )r   docsr   rW   r   r   rq      s   
zNERProcessor.bulk_processr   c                 C   s   | j |  S )z
        Return the tags known by this model

        Removes the S-, B-, etc, and does not include O
        Specify model_idx if the processor  has more than one model
        )r0   get_known_tags)r   	model_idxr   r   r   rs      s   zNERProcessor.get_known_tags)r   )__name__
__module____qualname__re   rf   PROVIDES_DEFAULTTOKENIZEREQUIRES_DEFAULTr   rD   rQ   rT   rV   rp   rq   rs   __classcell__r   r   rW   r   r      s    

	.	r   )__doc__ra   loggingstanza.models.commonr   stanza.models.common.exceptionsr   r   stanza.models.common.utilsr   stanza.models.ner.datar   stanza.models.ner.trainerr   stanza.models.ner.utilsr	   stanza.pipeline._constantsstanza.pipeline.processorr   r   	getLoggerr2   rf   r   r   r   r   r   <module>   s    
