o
    h
                     @   sR   d Z ddlmZ ddlmZ ddlmZmZ dd ZeedG dd	 d	eZ	d
S )z.
Processors related to spaCy in the pipeline.
    )doc)TOKENIZE)ProcessorVariantregister_processor_variantc                  C   s&   zddl } W dS  ty   tdw )zI
    Import necessary components from spaCy to perform tokenization.
    r   NzlspaCy is used but not installed on your machine. Go to https://spacy.io/usage for installation instructions.T)spacyImportError)r    r   Y/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/pipeline/external/spacy.pycheck_spacy	   s   
r
   r   c                   @   s   e Zd Zdd Zdd ZdS )SpacyTokenizerc                 C   s   |d dkr
t dzddl}ddlm} W n ty!   tdw | | _|jdr7| j| j	d	 n| jd	 |
d
d| _dS )zJ Construct a spaCy-based tokenizer by loading the spaCy pipeline.
        langenz>spaCy tokenizer is currently only allowed in English pipeline.r   N)EnglishzqspaCy 2.0+ is used but not installed on your machine. Go to https://spacy.io/usage for installation instructions.z2.sentencizer	no_ssplitF)	Exceptionr   spacy.lang.enr   r   nlp__version__
startswithadd_pipecreate_pipegetr   )selfconfigr   r   r   r   r	   __init__   s   zSpacyTokenizer.__init__c           	      C   s   t |tjr
|j}n|}t |tstd| |}g }|jD ]0}g }|D ]$}tj|jtj	tj
 d|j dtj d|jt|j  i}|| q%|| q| jr[dd |D g}t||S )z^ Tokenize a document with the spaCy tokenizer and wrap the results into a Doc object.
        zFMust supply a string or Stanza Document object to the spaCy tokenizer.=|c                 S   s   g | ]	}|D ]}|qqS r   r   ).0str   r   r	   
<listcomp>H   s    z*SpacyTokenizer.process.<locals>.<listcomp>)
isinstancer   Documenttextstrr   r   sentsTEXTMISC
START_CHARidxEND_CHARlenappendr   )	r   documentr$   	spacy_doc	sentencessenttokenstoktoken_entryr   r   r	   process0   s$   


.zSpacyTokenizer.processN)__name__
__module____qualname__r   r5   r   r   r   r	   r      s    r   N)
__doc__stanza.models.commonr   stanza.pipeline._constantsr   stanza.pipeline.processorr   r   r
   r   r   r   r   r	   <module>   s    