o
    hI                     @   sN   d dl mZ d dlZd dlmZ d dlmZmZ edZG dd deZ	dS )    )CounterN)	BaseVocab)UNKPADz\sc                       s4   e Zd Z fddZdd Zdd Zdd Z  ZS )	Vocabc                    s.   t  j|i | t fdddD  _d S )Nc                    s   g | ]} j |qS  )lang
startswith).0xselfr   [/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/tokenization/vocab.py
<listcomp>   s    z"Vocab.__init__.<locals>.<listcomp>)zhjako)super__init__anylang_replaces_spaces)r   argskwargs	__class__r   r   r   
   s   zVocab.__init__c                    s   | j }t  |D ]}|D ]}| |d } |  d7  < qqttgttt   fdddd | _dd t	| jD | _
d S )	Nr      c                    s    |  S Nr   )kcounterr   r   <lambda>   s    z#Vocab.build_vocab.<locals>.<lambda>T)keyreversec                 S   s   i | ]\}}||qS r   r   )r
   iwr   r   r   
<dictcomp>   s    z%Vocab.build_vocab.<locals>.<dictcomp>)datar   normalize_unitr   r   listsortedkeys_id2unit	enumerate_unit2id)r   parasparaunit
normalizedr   r   r   build_vocab   s   ,zVocab.build_vocabc                 C   s   |S r   r   )r   r0   r   r   r   r'      s   zVocab.normalize_unitc                 C   s&   t d| }| jr|dd}|S )N  )SPACE_REsublstripr   replace)r   tokenr   r   r   normalize_token   s   zVocab.normalize_token)__name__
__module____qualname__r   r2   r'   r:   __classcell__r   r   r   r   r   	   s
    r   )
collectionsr   restanza.models.common.vocabr   r   r   compiler5   r   r   r   r   r   <module>   s    
