o
    h?                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlT dd	lmZmZ dd
lmZ dZeedG dd deZdS )z-
Processor for performing dependency parsing
    N)doc)unsortVOCAB_PREFIX)
DataLoader)Trainer)*)UDProcessorregister_processor)ConverterDepparse   )namec                       sT   e Zd ZeegZeeeegZ	 fddZ
dd Zdd Zdd Zd	d
 Z  ZS )DepparseProcessorc                    s   d | _ t ||| d S N)
_pretaggedsuper__init__)selfconfigpipelinedevice	__class__ ]/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/pipeline/depparse_processor.pyr      s   zDepparseProcessor.__init__c                 C   s.   | j d| _| jrt | _d S | jj| _d S )N	pretagged)_configgetr   set	_requiresr   REQUIRES_DEFAULT)r   r   r   r   _set_up_requires    s   z"DepparseProcessor._set_up_requiresc                 C   sX   d|v r|j |d nd | _|dd |dd d}t|| j|d ||j d| _d S )Npretrain_pathforward_charlm_pathbackward_charlm_path)charlm_forward_filecharlm_backward_file
model_path)argspretrain
model_filer   foundation_cache)r+   load_pretrain	_pretrainr   r   r)   _trainer)r   r   r   r   r(   r   r   r   _set_up_model'   s
   

 zDepparseProcessor._set_up_modelc                 C   s   dd | j d j D }|S )zM
        Return a list of relations which this processor can produce
        c                 S   s   g | ]}|t vr|qS r   r   ).0kr   r   r   
<listcomp>1   s    z9DepparseProcessor.get_known_relations.<locals>.<listcomp>deprel)vocab_unit2idkeys)r   r6   r   r   r   get_known_relations-   s   z%DepparseProcessor.get_known_relationsc           	      C   sL  t | dr| j|S tdd |jD rtdzmt|| jd | j| j| j	d| j
dd| j
dtd	}t  g }t|D ]\}}|| j|7 }qAW d    n1 sXw   Y  |jd urht||j}|jtjtjfd
d |D  |jjD ]}|  q||jW S  ty } zt|drt|d }t|| d }~ww )N_variantc                 s   s0    | ]}|j D ]}|jd u o|jd u V  qqd S r   )wordsuposxpos)r0   sentencewordr   r   r   	<genexpr>8   s   . z,DepparseProcessor.process.<locals>.<genexpr>zPOS not run before depparse!
batch_sizeTsort_during_evalmin_length_to_batch_separately)r4   
evaluationr@   rA   c                 S   s   g | ]	}|D ]}|qqS r   r   )r0   xyr   r   r   r2   D   s    z-DepparseProcessor.process.<locals>.<listcomp>z%CUDA out of memory. Tried to allocatez ... You may be able to compensate for this by separating long sentences into their own batch with a parameter such as depparse_min_length_to_batch_separately=150 or by limiting the overall batch size with depparse_batch_size=400.)hasattrr8   processany	sentences
ValueErrorr   r   r)   r4   r   DEFAULT_SEPARATE_BATCHtorchno_grad	enumeratetrainerpredictdata_orig_idxr   r   r   HEADDEPRELbuild_dependenciesRuntimeErrorstr
startswith)	r   documentbatchpredsibr<   enew_messager   r   r   rF   4   s8   


 

zDepparseProcessor.process)__name__
__module____qualname__r   DEPPARSEPROVIDES_DEFAULTTOKENIZEPOSLEMMAr    r   r!   r/   r7   rF   __classcell__r   r   r   r   r      s    
r   )__doc__rK   stanza.models.commonr   stanza.models.common.utilsr   stanza.models.common.vocabr   stanza.models.depparse.datar   stanza.models.depparse.trainerr   stanza.pipeline._constantsstanza.pipeline.processorr	   r
   3stanza.pipeline.external.corenlp_converter_depparser   rJ   ra   r   r   r   r   r   <module>   s    