o
    h*                     @   s   d Z ddlmZmZ ddlmZ ddlmZmZm	Z	 G dd de
ZG dd deZG d	d
 d
eZG dd deZG dd de
Zdd Zdd ZdS )z
Base classes for processors
    )ABCabstractmethod)Document)NAME_TO_PROCESSOR_CLASSPIPELINE_NAMESPROCESSOR_VARIANTSc                   @   sX   e Zd ZdZdd Zedd Zedd Zedd	 Zed
d Z	dd Z
dd ZdS )ProcessorRequirementsExceptionzA Exception indicating a processor's requirements will not be met c                 C   s(   || _ | j  || _|| _|   d S N)_err_processorerr_processormark_inactive_processors_list_provided_reqsbuild_message)selfprocessors_listr   provided_reqs r   T/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/pipeline/processor.py__init__   s
   
z'ProcessorRequirementsException.__init__c                 C      | j S )z) The processor that raised the exception )r
   r   r   r   r   r         z,ProcessorRequirementsException.err_processorc                 C   s   t | jjS r	   )typer   __name__r   r   r   r   processor_type   s   z-ProcessorRequirementsException.processor_typec                 C   r   r	   )r   r   r   r   r   r         z.ProcessorRequirementsException.processors_listc                 C   r   r	   )r   r   r   r   r   r   "   r   z,ProcessorRequirementsException.provided_reqsc                 C   sN   d| j  dd| j d| jj d| jj| j d| jj| j  d| _d S )Nz----
Pipeline Requirements Error!
	Processor: z
	Pipeline processors list: ,z
	Processor Requirements: z
		- fulfilled: z
		- missing: z

The processors list provided for this pipeline is invalid.  Please make sure all prerequisites are met for every processor.

)r   joinr   r   requiresintersectionr   messager   r   r   r   r   &   s   
z,ProcessorRequirementsException.build_messagec                 C   r   r	   r!   r   r   r   r   __str__0      z&ProcessorRequirementsException.__str__N)r   
__module____qualname____doc__r   propertyr   r   r   r   r   r#   r   r   r   r   r   
   s    




r   c                   @   s   e Zd ZdZdd Zdd Zedd Zdd	 Zd
d Z	dd Z
dd Zdd Zedd Zedd Zedd Zedd Zdd ZdS )	Processorz Base class for all processors c                 C   s`   || _ || _| || |  s|   |   |   t| dr,| jj	r.| jj
| _
d S d S d S N_variant)_config	_pipeline_set_up_variants_set_up_variant_requires_set_up_requires_set_up_provides_check_requirementshasattrr+   OVERRIDEprocessr   configpipelinedevicer   r   r   r   7   s   zProcessor.__init__c                 C   s:   | j j}d}| jdur| jd}|du r|S d||S )zB
        Simple description of the processor: name(model)
        N
model_pathz{}({}))	__class__r   r,   getformat)r   namemodelr   r   r   r#   I   s   
zProcessor.__str__c                 C      dS )z> Process a Document.  This is the main method of a processor. Nr   r   docr   r   r   r5   W   s   zProcessor.processc                    s(   t  dr j|S  fdd|D S )h Process a list of Documents. This should be replaced with a more efficient implementation if possible. r+   c                       g | ]}  |qS r   r5   .0rB   r   r   r   
<listcomp>b       z*Processor.bulk_process.<locals>.<listcomp>)r3   r+   bulk_processr   docsr   r   r   rJ   \   s   
zProcessor.bulk_processc                 C      | j j| _dS )zf Set up what processor requirements this processor fulfills.  Default is to use a class defined list. N)r;   PROVIDES_DEFAULT	_providesr   r   r   r   r1   d      zProcessor._set_up_providesc                 C   rM   )zR Set up requirements for this processor.  Default is to use a class defined list. N)r;   REQUIRES_DEFAULT	_requiresr   r   r   r   r0   h   rP   zProcessor._set_up_requiresc                 C   sT   t | dsdS t | jdr| j  | jj| _dS t | jjdr(| jjj| _dS dS )z
        If this has a variant with its own requirements, use those instead

        Returns True iff the _requires is set from the _variant
        r+   Fr0   TrQ   )r3   r+   r0   rR   r;   rQ   r   r   r   r   r/   l   s   


z"Processor._set_up_variant_requiresc                    sd   t | jjd }t fddt| D r0d | _ fddt| D d }t| |  | _d S d S )Nr   c                 3   s"    | ]}  d | dV  qdS )with_FNr<   rG   variantr7   r   r   	<genexpr>   s     z-Processor._set_up_variants.<locals>.<genexpr>c                    s"   g | ]}  d | dr|qS )rS   FrT   rU   rW   r   r   rH      s   " z.Processor._set_up_variants.<locals>.<listcomp>)listr;   rN   anyr   _trainerr+   )r   r7   r9   processor_namevariant_namer   rW   r   r.   }   s   zProcessor._set_up_variantsc                 C   r   )z" Configurations for the processor )r,   r   r   r   r   r7      r   zProcessor.configc                 C   r   )z- The pipeline that this processor belongs to )r-   r   r   r   r   r8      r   zProcessor.pipelinec                 C   r   r	   )rO   r   r   r   r   provides   r   zProcessor.providesc                 C   r   r	   )rR   r   r   r   r   r      r   zProcessor.requiresc                 C   s`   | j dds	dS tjdd | jjD tg g  }| j| r.dd | jjD }t|| |dS )zg Given a list of fulfilled requirements, check if all of this processor's requirements are met or not. check_requirementsTNc                 S   s   g | ]}|j qS r   )r^   )rG   	processorr   r   r   rH      s    z1Processor._check_requirements.<locals>.<listcomp>c                 S   s   g | ]}|d  qS )r   r   )rG   itemr   r   r   rH      s    )	r7   r<   setunionr8   loaded_processorsr   	load_listr   )r   r   
load_namesr   r   r   r2      s   "
zProcessor._check_requirementsN)r   r%   r&   r'   r   r#   r   r5   rJ   r1   r0   r/   r.   r(   r7   r8   r^   r   r2   r   r   r   r   r)   4   s(    




r)   c                   @   s(   e Zd ZdZdZedd Zdd ZdS )ProcessorVariantz' Base class for all processor variants Fc                 C   r@   )a  
        Process a document that is potentially preprocessed by the processor.
        This is the main method of a processor variant.

        If `OVERRIDE` is set to True, all preprocessing by the processor would be bypassed, and the processor variant
        would serve as a drop-in replacement of the entire processor, and has to be able to interpret all the configs
        that are typically handled by the processor it replaces.
        Nr   rA   r   r   r   r5      s   
zProcessorVariant.processc                    s    fdd|D S )rC   c                    rD   r   rE   rF   r   r   r   rH      rI   z1ProcessorVariant.bulk_process.<locals>.<listcomp>r   rK   r   r   r   rJ      s   zProcessorVariant.bulk_processN)r   r%   r&   r'   r4   r   r5   rJ   r   r   r   r   rg      s    
rg   c                       st   e Zd ZdZ fddZedd Zdd Zdd	 Ze	d
d Z
e	dd Ze	dd Zedd Zdd Z  ZS )UDProcessorzb Base class for the neural UD Processors (tokenize,mwt,pos,lemma,depparse,sentiment,constituency) c                    sH   t  ||| d | _d | _d | _t| ds| ||| | | d S r*   )superr   	_pretrainr[   _vocabr3   _set_up_model_set_up_final_configr6   r;   r   r   r      s   
zUDProcessor.__init__c                 C   s   d S r	   r   r6   r   r   r   rl      s   zUDProcessor._set_up_modelc                 C   sJ   | j dur| j j| j j}| _dd | D }ni }|| || _dS )zV Finalize the configurations for this processor, based off of values from a UD model. Nc                 S   s    i | ]\}}t |s||qS r   )rh   filter_out_option)rG   kvr   r   r   
<dictcomp>   s     z4UDProcessor._set_up_final_config.<locals>.<dictcomp>)r[   argsvocabrk   itemsupdater,   )r   r7   loaded_argsr   r   r   rm      s   


z UDProcessor._set_up_final_configc                 C   s   d| _ d| _dS )ze Drop memory intensive resources if keeping this processor around for reasons other than running it. N)r[   rk   r   r   r   r   r      s   
zUDProcessor.mark_inactivec                 C   r   r	   )rj   r   r   r   r   pretrain   r   zUDProcessor.pretrainc                 C   r   r	   )r[   r   r   r   r   trainer   r   zUDProcessor.trainerc                 C   r   r	   )rk   r   r   r   r   rt      r   zUDProcessor.vocabc                 C   s0   g d}|  ds|  drdS | |v rdS dS )z) Filter out non-processor configurations )	r9   cpucudadev_conll_goldepochslangmode	save_name	shorthand_file_dirTF)endswith)optionoptions_to_filterr   r   r   ro      s   zUDProcessor.filter_out_optionc                 C   sh   t | dr| j|S dd |D }tg }||_tdd |D |_tdd |D |_| | |S )a  
        Most processors operate on the sentence level, where each sentence is processed independently and processors can benefit
        a lot from the ability to combine sentences from multiple documents for faster batched processing. This is a transparent
        implementation that allows these processors to batch process a list of Documents as if they were from a single Document.
        r+   c                 S   s   g | ]
}|j D ]}|qqS r   )	sentences)rG   rB   sentr   r   r   rH      s    z,UDProcessor.bulk_process.<locals>.<listcomp>c                 s       | ]}|j V  qd S r	   )
num_tokensrF   r   r   r   rX          z+UDProcessor.bulk_process.<locals>.<genexpr>c                 s   r   r	   )	num_wordsrF   r   r   r   rX      r   )	r3   r+   rJ   r   r   sumr   r   r5   )r   rL   combined_sentscombined_docr   r   r   rJ      s   

zUDProcessor.bulk_process)r   r%   r&   r'   r   r   rl   rm   r   r(   rx   ry   rt   staticmethodro   rJ   __classcell__r   r   rn   r   rh      s     





rh   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	ProcessorRegisterExceptionzB Exception indicating processor or processor registration failure c                 C   s   || _ || _|   d S r	   )_processor_class_expected_parentr   )r   processor_classexpected_parentr   r   r   r   	  s   z#ProcessorRegisterException.__init__c                 C   s   d| j  d| j d| _d S )NzFailed to register 'z'. It must be a subclass of 'z'.)r   r   r!   r   r   r   r   r     s   z(ProcessorRegisterException.build_messagec                 C   r   r	   r"   r   r   r   r   r#     r$   z"ProcessorRegisterException.__str__N)r   r%   r&   r'   r   r   r#   r   r   r   r   r     s
    r   c                    s    fdd}|S )Nc                    s*   t | ts
t| t| t < t  | S r	   )
issubclassr)   r   r   r   appendClsr>   r   r   wrapper  s
   


z#register_processor.<locals>.wrapperr   )r>   r   r   r   r   register_processor  s   r   c                    s    fdd}|S )Nc                    s$   t | ts
t| t| t  < | S r	   )r   rg   r   r   r   r>   rV   r   r   r     s   

z+register_processor_variant.<locals>.wrapperr   )r>   rV   r   r   r   r   register_processor_variant  s   r   N)r'   abcr   r   stanza.models.common.docr   stanza.pipeline.registryr   r   r   	Exceptionr   r)   rg   rh   r   r   r   r   r   r   r   <module>   s    *lP
