o
    h                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ed	Zd
d Zdd Zdd ZedkrHe  dS dS )ah  
This script allows for training or testing on dev / test of the UD mwt tools.

If run with a single treebank name, it will train or test that treebank.
If run with ud_all or all_ud, it will iterate over all UD treebanks it can find.

Mode can be set to train&dev with --train, to dev set only
with --score_dev, and to test set only with --score_test.

Treebanks are specified as a list.  all_ud or ud_all means to look for
all UD treebanks.

Extra arguments are passed to mwt.  In case the run script
itself is shadowing arguments, you can specify --extra_args as a
parameter to mark where the mwt arguments start.
    N)mwt_expander)Document)CoNLL)common)Mode)max_mwt_lengthstanzac                 C   s    t | }|d}t|dkS )zF
    Checks whether or not there are MWTs in the given conll file
    Fr   )r   	conll2docget_mwt_expansionslen)filenamedocdata r   X/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/training/run_mwt.py	check_mwt    s   

r   c                 C   s  | dd }|d }| d| d}	| d| d}
| d| d}|r'|n| d| d}| d| d	}| d| d
}|rC|n| d| d}| d| d}| d| d}| d| d}d }d|v rr||dd  }d }d|v r||dd  }t|	std|  d S t|
s| tjkrtd|  |d | tjkrt	t
||gd d }td|  d|	d|r|n|
d|d|r|n|d|d|dddt|g}|| }td| t| | tjks| tjkr0d|r|n|
d|d|r|n|d|d|ddg}|| }td | t| t|r#|n||}td!|| | tjkrud|r<|n|d|d|rE|n|d|d|ddg}|| }td"| t| t|rf|n||}td#|| d S d S )$N_r   MWT_DATA_DIR/z.train.in.conlluz.dev.in.conlluz.dev.gold.conlluz.dev.pred.conlluz.test.in.conlluz.test.gold.conlluz.test.pred.conlluz-ud-train-mwt.jsonz-ud-dev-mwt.jsonz-ud-test-mwt.jsonz--eval_file   z--gold_filez(No training MWTS found for %s.  SkippingzGNo dev MWTS found for %s.  Training only the deterministic MWT expanderz--dict_onlyg?zMax len: %fz--train_filez--output_filez--langz--shorthandz--modetrainz--max_dec_lenz Running train step with args: {}predictzRunning dev step with args: {}z!Finished running dev set on
{}
{}zRunning test step with args: {}z"Finished running test set on
{}
{})splitindexr   loggerinfor   TRAINappendmathceilr   strformatr   main	SCORE_DEVr   run_eval_script_mwt
SCORE_TEST)modepathstreebank
short_nametemp_output_filecommand_args
extra_argsshort_languagemwt_dir
train_filedev_in_filedev_gold_filedev_output_filetest_in_filetest_gold_filetest_output_file
train_jsondev_json	test_json	eval_file	gold_filemax_mwt_len
train_argsdev_argsresults	test_argsr   r   r   run_treebank(   s   




r@   c                   C   s   t jtddt d d S )Nmwtr   )sub_argparse)r   r"   r@   r   build_argparser   r   r   r   r"   u   s   r"   __main__)__doc__loggingr   stanza.modelsr   stanza.models.common.docr   stanza.utils.conllr   stanza.utils.trainingr   stanza.utils.training.commonr   stanza.utils.max_mwt_lengthr   	getLoggerr   r   r@   r"   __name__r   r   r   r   <module>   s     
M
