o
    h                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZ d dlm	Z	 d dl
mZmZmZmZmZ e dZdd Zd	d
 Zdd Zdd Zdd ZedkrNe  dS dS )    N)tagger)no_pretrain_languagespos_pretrainsdefault_pretrains)common)Modeadd_charlm_argsbuild_pos_charlm_argschoose_pos_charlmfind_wordvec_pretrainstanzac                 C   s   t |  | jddddd d S )Nz
--use_bertF
store_truez-Use the default transformer for this language)defaultactionhelp)r   add_argument)parser r   X/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/training/run_pos.pyadd_pos_args   s   r   c                 C   sf   d|v sd|v r
g S | t v rtd|  dgS | tv r&|t|  v r&t}ni }t| t||}d|gS )Nz--wordvec_pretrain_filez--no_pretrainziNo known word vectors for language {}  If those vectors can be found, please update the training scripts.)r   loggerwarningformatr   r   r   )short_languagedataset
extra_argsdataset_pretrainswordvec_pretrainr   r   r   wordvec_args   s   r   c                 C   s   | dd\}}t|||j}tj|||dd}d|ddg}|t||| | | | }|jd ur9|d|jg |jd urF|d	|jg t	
|}	t	|	}
|
S )
N_   F)warn--shorthand--modetrainz--save_namez
--save_dir)splitr	   charlmr   choose_transformerr   	save_nameextendsave_dirr   
parse_argsmodel_file_name)paths
short_namecommand_argsr   r   r   charlm_args	bert_args
train_argsargsr(   r   r   r   build_model_filename%   s   



r4   c                 C   sB  | dd\}}|d }	|	 d| d}
|dkr |
d|	 d7 }
|	 d| d	}|r,|n|	 d| d
}|	 d| d}|r@|n|	 d| d}t|||j}t|||}d }d|v re||dd  }| tjkrg }|
 dD ]F}tj	
|d d }tj	|rtj	|rtd|||f   d S tj	|r|| qqtj	|r|| qqtd|   d S d|}
d|d d|
d|d|d|ddg}|d u r|d|g7 }|t||| | | }|| }td|| t| | tjks| tjkrLd|d d|d|d|ddg
}|d u r|d|g7 }|t||| | | }|| }td|| t| t|r?|n||}td|| | tjkrd|d d|d|d|ddg
}|d u rk|d|g7 }|t||| | | }|| }td|| t| t|r|n||}td|| d S d S ) Nr   r    POS_DATA_DIR/z.train.in.conllu	vi_vlsp22;z/vi_vtb.train.in.conlluz.dev.in.conlluz.dev.pred.conlluz.test.in.conlluz.test.pred.conlluz--eval_filer   z.zipzJPOS TRAIN FILE %s and %s both exist... this is very confusing, skipping %sz%TRAIN FILE NOT FOUND: %s ... skippingz--wordvec_dirWORDVEC_DIRz--train_filez--output_filez--langr"   r#   r$   z%Running train POS for {} with args {}predictz#Running dev POS for {} with args {}z!Finished running dev set on
{}
{}z$Running test POS for {} with args {}z"Finished running test set on
{}
{})r%   r	   r&   r   r'   indexr   TRAINospathsplitextexistsr   errorappendjoinr   infor   r   main	SCORE_DEVrun_eval_script_pos
SCORE_TEST)moder-   treebankr.   temp_output_filer/   r   r   r   pos_dir
train_filedev_in_filedev_pred_filetest_in_filetest_pred_filer0   r1   	eval_filetrain_piecestrain_piece	zip_piecer2   dev_argsresults	test_argsr   r   r   run_treebank;   s   







rY   c                	   C   s    t jtddtt ttd d S )Nposr   )r4   choose_charlm_method)r   rE   rY   r   r   build_argparser4   r
   r   r   r   r   rE      s    rE   __main__)loggingr=   stanza.modelsr   !stanza.resources.default_packagesr   r   r   stanza.utils.trainingr   stanza.utils.training.commonr   r   r	   r
   r   	getLoggerr   r   r   r4   rY   rE   __name__r   r   r   r   <module>   s   
S
