o
    –hœ  ã                   @   s<   d Z ddlZd	dd„Zedkreejd ejd ƒ dS dS )
zu
Converts the WikiNER data format to a format usable by our processing tools

python preprocess_wikiner input output
é    Núutf-8c                 C   sR  t | |d™}t |dddy}|D ]n}| ¡ }|s$| d¡ | d¡ q| ¡ }|D ]O}| d¡}|d }	|d }
|	 d	¡}|
 d
¡rkt|ƒdkrk| d |d |
¡¡ |dd … D ]}| d ||
dd … ¡¡ qZq*|D ]}| d ||
¡¡ qmq*| d¡ qW d   ƒ n1 sŠw   Y  W d   ƒ d S W d   ƒ d S 1 s¢w   Y  d S )N)ÚencodingÚwr   z-DOCSTART- O
Ú
ú|r   éÿÿÿÿÚ_zB-é   z{} {}
z{} I-{}
é   )ÚopenÚstripÚwriteÚsplitÚ
startswithÚlenÚformat)Ú
input_fileÚoutput_filer   ÚfinÚfoutÚlineÚwordsÚwordÚpiecesÚtextÚtagÚsubtextÚchunk© r   úg/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/datasets/ner/preprocess_wikiner.pyÚpreprocess_wikiner	   s8   



ÿÿêÿÿ"ÿr    Ú__main__r	   r
   )r   )Ú__doc__Úsysr    Ú__name__Úargvr   r   r   r   Ú<module>   s    
ÿ