o
    h;                     @   s   d dl mZ d dlmZ d dlZd dlZd dlmZ d dlm	Z	m
Z
 G dd deZedd	d
gZeejdZedZdd Zdd Zdd ZdS )    )
namedtuple)EnumN)VOCAB_PREFIX)	XPOSVocab	WordVocabc                   @   s   e Zd ZdZdZdS )XPOSType      N)__name__
__module____qualname__XPOSWORD r   r   ]/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/pos/xpos_vocab_utils.pyr   	   s    r   XPOSDescription	xpos_typesepstanzac                 C   s>   g }| D ]}d}|D ]
}|| d u rd}q
|r| | q|S )NTF)append)dataidxdata_filteredsentenceflagtokenr   r   r   filter_data   s   r   c                 C   s   t dt|   t| dd} t dt|   t| |ddgd}t}t|tt }|dkrVdD ]"}t| |d|d	}td
d |j	
 D }||k rUttj|}|}q3|S )NzOriginal length = r	   )r   zFiltered length = _r   ignore   ) -+|,:r   r   c                 s   s     | ]}t |t t V  qd S )N)lenr   ).0xr   r   r   	<genexpr>&   s    z*choose_simplest_factory.<locals>.<genexpr>)loggerinfor(   r   r   DEFAULT_KEYr   r   sum_id2unitvaluesr   r   r   )r   	shorthandvocabkey	best_sizer   lengthr   r   r   choose_simplest_factory   s   r7   c                 C   s0   | j tju rt||ddgdS t||d| jdS )Nr	   r   r   r'   )r   r   r   r   r   r   )descriptionr   r2   r   r   r   build_xpos_vocab,   s   r9   )collectionsr   enumr   loggingosstanza.models.common.vocabr   stanza.models.pos.vocabr   r   r   r   r   r.   	getLoggerr,   r   r7   r9   r   r   r   r   <module>   s    

