o
    h                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZmZmZ ddlmZmZ ed	Zd
d Zdd Zdd Zdd Zdd Zdd Zedkr^e  dS dS )a  
Trains or scores a constituency model.

Currently a suuuuper preliminary script.

Example of how to run on multiple parsers at the same time on the Stanford workqueue:

for i in `echo 1000 1001 1002 1003 1004`; do nlprun -d a6000 "python3 stanza/utils/training/run_constituency.py vi_vlsp23 --use_bert --stage1_bert_finetun --save_name vi_vlsp23_$i.pt --seed $i --epochs 200 --force" -o vi_vlsp23_$i.out; done

    N)constituency_parser)RETAG_METHOD)prepare_con_dataset)common)Modeadd_charlm_argsbuild_charlm_argschoose_charlmfind_wordvec_pretrain)default_charlmsdefault_pretrainsstanzac                 C   s2   t |  | jddddd | jdddd	d
d d S )Nz
--use_bertF
store_truez-Use the default transformer for this language)defaultactionhelpz--parse_textmodestore_const
parse_textzParse a text file)destr   constr   )r   add_argument)parser r   a/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/training/run_constituency.pyadd_constituency_args   s   r   c                 C   s&   d|vrt | t}d|g}|S g }|S )Nz--wordvec_pretrain_file)r
   r   )short_languagedataset
extra_argswordvec_pretrainwordvec_argsr   r   r   build_wordvec_args    s   
r!   c                 C   sn   |t v rdt | g}ng }t|||}t|||jti }t||dd}tj|||ddd}	|| | |	 }
|
S )Nz--retag_methodF)	base_argsT)warnlayers)r   r!   r	   charlmr   r   r   choose_transformer)pathsr   r   command_argsr   
retag_argsr    r%   charlm_args	bert_argsdefault_argsr   r   r   build_default_args*   s   r-   c           
      C   s   | dd\}}t| ||||}d|ddg}|| }|jd ur'|d|jg |jd ur4|d|jg t|}t|}	|	S )N_   --shorthand--modetrainz--save_namez
--save_dir)splitr-   	save_nameextendsave_dirr   
parse_argsbuild_model_filename)
r'   
short_namer(   r   r   r   r,   
train_argsargsr4   r   r   r   r8   :   s   



r8   c                 C   s  |d }| d\}}	tj|| d}
tj|| d}tj|| d}tj|
r;tj|r;tj|s_td| d zt| W n   t	d|
 d	| d	| d
  t
|||	||}| tjkrd|
d|d|ddg}|| | }td| t| | tjks| tjkrd|d|ddg}|| | }td| t| | tjks| tjkrd|d|ddg}|| | }td| t| | dkrd|ddg}|| | }td| t| d S d S )NCONSTITUENCY_DATA_DIRr.   z
_train.mrgz_dev.mrgz	_test.mrgzThe data for z4 is missing or incomplete.  Attempting to rebuild...z?Unable to build the data.  Please correctly build the files in z, z and then try again.z--train_filez--eval_filer0   r1   r2   z Running train step with args: {}predictzRunning dev step with args: {}zRunning test step with args: {}r   zProcessing text with args: {})r3   ospathjoinexistsloggerwarningr   mainerrorr-   r   TRAINinfoformatr   	SCORE_DEV
SCORE_TEST)r   r'   treebankr9   temp_output_filer(   r   constituency_dirr   r   
train_filedev_file	test_filer,   r:   dev_args	test_args	text_argsr   r   r   run_treebankK   s\   $



rT   c                   C   s   t jtddtt td d S )Nconstituency)sub_argparser8   )r   rD   rT   r   r   build_argparser8   r   r   r   r   rD   }   s   rD   __main__)__doc__loggingr>   stanza.modelsr   $stanza.models.constituency.retaggingr   "stanza.utils.datasets.constituencyr   stanza.utils.trainingr   stanza.utils.training.commonr   r   r   r	   r
   !stanza.resources.default_packagesr   r   	getLoggerrB   r   r!   r-   r8   rT   rD   __name__r   r   r   r   <module>   s&    

2
