o
    h/"                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZmZ ddlmZ dd	lmZ ddlm  m  mZ ed
Zdd Zdd Zdd Zdd Zedkrde  dS dS )a{  
This script allows for training or testing on dev / test of the UD lemmatizer.

If run with a single treebank name, it will train or test that treebank.
If run with ud_all or all_ud, it will iterate over all UD treebanks it can find.

Mode can be set to train&dev with --train, to dev set only
with --score_dev, and to test set only with --score_test.

Treebanks are specified as a list.  all_ud or ud_all means to look for
all UD treebanks.

Extra arguments are passed to the lemmatizer.  In case the run script
itself is shadowing arguments, you can specify --extra_args as a
parameter to mark where the lemmatizer arguments start.
    N)identity_lemmatizer)
lemmatizer)attach_lemma_classifier)common)Modeadd_charlm_argsbuild_lemma_charlm_argschoose_lemma_charlm)run_lemma_classifier)check_lemmasstanzac                 C   s2   t |  | jdddd dd | jddddd d S )	Nz--lemma_classifierlemma_classifier
store_truezDon't use the lemma classifier datasets.  Default is to build lemma classifier as part of the original lemmatizer if the charlm is used)destactiondefaulthelpz--no_lemma_classifierstore_false)r   r   r   )r   add_argument)parser r   Z/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/training/run_lemma.pyadd_lemma_args"   s   

r   c                 C   s   | dd\}}| d }| d| d}tj|s%td||f  dS t|}|s-dS t|||j}	d|d	|d
dg}
|
|	 | }
t	
|
}t	|}|S )z
    Figure out what the model savename will be, taking into account the model settings.

    Useful for figuring out if the model already exists

    None will represent that there is no expected save_name
    _   LEMMA_DATA_DIR/.train.in.conlluzTreebank %s is not prepared for training the lemmatizer.  Could not find any training data at %s  Cannot figure out the expected save_name without looking at the data, but a later step in the process will skip the training anywayN--train_file--shorthand--modetrain)splitospathexistsloggerdebugr   r   charlmr   
parse_argsbuild_model_filename)paths
short_namecommand_args
extra_argsshort_languagedataset	lemma_dir
train_file
has_lemmascharlm_args
train_argsargs	save_namer   r   r   r*   *   s$   

r*   c                 C   sF  | dd\}}|d }	|	 d| d}
|	 d| d}|	 d| d}|r(|n|	 d| d}|	 d| d	}|	 d| d
}|rD|n|	 d| d}t|||j}tj|
sdtd||
f  d S t|
}|st	d| d | d  | t
jks| t
jkrd|
d|d|d|d|g
}t	d|| t| d S | t
jkrd|
d|d|d|d|g
}t	d|| t| d S d S | t
jkr|dv rd}nd}d|
d|d|d|d|d|ddg}|| | }t	d|| t| | t
jks| t
jkrd|d|d|d|ddg
}|| | }t	d|| t| | t
jkrCd|d|d|d|ddg
}|| | }t	d|| t| |j}|d u rP|jd u}|oW|tjv }|r| t
jkr|jd u rjd gnd!|jg}|g| }|jr}|d" t| t||||}d#|d$|d%d&| g}t| t| d S d S d S )'Nr   r   r   r   r   z.dev.in.conlluz.dev.gold.conlluz.dev.pred.conlluz.test.in.conlluz.test.gold.conlluz.test.pred.conlluzmTreebank %s is not prepared for training the lemmatizer.  Could not find any training data at %s  Skipping...z	Treebank z (z+) has no lemmas.  Using identity lemmatizerr   z--eval_filez--output_filez--gold_filer   z/Running identity lemmatizer for {} with args {})cs_pdtru_syntagrusde_hdt3060z--num_epochr    r!   z,Running train lemmatizer for {} with args {}predictz*Running dev lemmatizer for {} with args {}z+Running test lemmatizer for {} with args {}z--no_charlmz--charlmz--forcez--inputz--outputz--classifierz4saved_models/lemma_classifier/%s_lemma_classifier.pt)r"   r   r(   r#   r$   r%   r&   errorr   infor   TRAIN	SCORE_DEVformatr   main
SCORE_TESTr   r   prepare_lemma_classifierDATASET_MAPPINGforceappendr
   r*   r   )moder+   treebankr,   temp_output_filer-   r.   r/   r0   r1   r2   dev_in_filedev_gold_filedev_pred_filetest_in_filetest_gold_filetest_pred_filer4   r3   r5   
num_epochsdev_args	test_argsuse_lemma_classifierlc_charlm_argslemma_classifier_argsr7   attach_argsr   r   r   run_treebankK   s   

	








rY   c                	   C   s    t jtddtt ttd d S )Nlemmar   )sub_argparser*   choose_charlm_method)r   rC   rY   r   r   build_argparser*   r	   r   r   r   r   rC      s    rC   __main__)__doc__loggingr#   stanza.modelsr   r   stanza.models.lemmar   stanza.utils.trainingr   stanza.utils.training.commonr   r   r   r	   r
   ,stanza.utils.datasets.prepare_lemma_treebankr   .stanza.utils.datasets.prepare_lemma_classifierutilsdatasetsrE   	getLoggerr&   r   r*   rY   rC   __name__r   r   r   r   <module>   s&    
!c
