o
    h?P                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZmZmZmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlm  mZ d dlmZ e d	Z!G d
d deZ"G dd de j#Z$d6ddZ%dd Z&d7ddZ'd6ddZ(dd Z)dd Z*dd Z+dd Z,ddefddZ-efd d!Z.d"efd#d$Z/d%d& Z0d'd( Z1d)d* Z2d+d, Z3d8d.d/Z4d"efd0d1Z5d"efd2d3Z6d"efd4d5Z7dS )9    N)Enum)default_charlmslemma_charlmspos_charlmsdepparse_charlmsTRANSFORMERSTRANSFORMER_LAYERS)treebank_to_short_name)	ud_scores)downloadDEFAULT_MODEL_DIRUnknownLanguageError)common)conll18_ud_evalstanzac                   @   s   e Zd ZdZdZdZdZdS )Mode            N)__name__
__module____qualname__TRAIN	SCORE_DEV
SCORE_TESTSCORE_TRAIN r   r   W/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/training/common.pyr      s
    r   c                       s6   e Zd Z fddZd fdd	Z fddZ  ZS )	ArgumentParserWithExtraHelpc                    s   t  j|i | || _d S N)super__init__sub_argparse)selfr#   argskwargs	__class__r   r   r"      s   
z$ArgumentParserWithExtraHelp.__init__Nc                    s   t  j|d d S )N)file)r!   
print_help)r$   r)   r'   r   r   r*   !      z&ArgumentParserWithExtraHelp.print_helpc                    s   t   }| jd ur@| j d}d}t|D ]\}}| dr&|}q|dkr2| s2|} nq|d d||d   }|S )N
zusage:r   z

model arguments:)r!   format_helpr#   split	enumeratestrip
startswithjoin)r$   	help_textsub_text
first_lineline_idxliner'   r   r   r.   $   s   

z'ArgumentParserWithExtraHelp.format_helpr    )r   r   r   r"   r*   r.   __classcell__r   r   r'   r   r      s    r   c                 C   s   t | tjd}|jdddddd |jdtd	d
d |jddtjdtjdd |jdddtjdd |jdddtjdd |jdddtj	dd |jdtd dd |jdtd dd |jdddd d! |jd"ddd#d! |jd$d%ddd&d' |S )(N)r#   formatter_classz--save_outputtemp_outputTstore_falsez1Save output - default is to use a temp directory.)destdefaultactionhelp	treebanks+zEWhich treebanks to run on.  Use all_ud or ud_all for all UD treebanks)typenargsr@   z--trainmodestore_constzRun in train mode)r=   r>   r?   constr@   z--score_devzScore the dev setr=   r?   rG   r@   z--score_testzScore the test setz--score_trainzNScore the train set as a test set.  Currently only implemented for some models
--save_dirzGRoot dir for saving models.  If set, will override the model's default.)rC   r>   r@   --save_namezHBase name for saving models.  If set, will override the model's default.z--charlm_only
store_trueFz:When asking for ud_all, filter the ones which have charlms)r?   r>   r@   z--transformer_onlyzPWhen asking for ud_all, filter the ones for languages where we have transformersz--forceforcezRetrain existing models)r=   r?   r>   r@   )
r   argparseArgumentDefaultsHelpFormatteradd_argumentstrr   r   r   r   r   )r#   parserr   r   r   build_argparse3   s   rR   c                 C   s*   | j ddtdd | j dddd dd	 d S )
N--charlmr>   zWhich charlm to run on.  Will use the default charlm for this language/model if not set.  Set to None to turn off charlm for languages with a default charlm)r>   rC   r@   z--no_charlmcharlmrF   zCDon't use a charlm, even if one is used by default for this packagerH   )rO   rP   )rQ   r   r   r   add_charlm_argsI   s   rU   c                    s  |du rt ddtj  tjdd }n
t dd|  t }t|}	|dur1||	 dtjv rPtjd}
tj|
d d }|		tjd|
 }n|	j
|d\}}|jrc|d|jg t|rk||}|j}g }|jD ]S}|dr|dd	 }| d
v rt|d } dur|jrt d  fdd|D }|jrt d dd |D }t d|d| || qs|| qst|D ]\}}|dkrt d t|}t d||f  g }|dkr|jr|j}t|dkrtj|\}}d||f }tj||}t d||| d|g}n|du r3d||f }t d|| d|g}ng }|tj kr|j!s|durL|||||}n"|jrYtj|j|}ntjd|}|d|g tj||}|du rtntj"|rt d||f  qt d||f  |j#r|dkrt$% }| |||||j&|||  W d   n	1 sw   Y  q| ||||d|||  qdS )a  
    A main program for each of the run_xyz scripts

    It collects the arguments and runs the main method for each dataset provided.
    It also tries to look for an existing model and not overwrite it unless --force is provided

    model_name can be a callable expecting the args
      - the charlm, for example, needs this feature, since it makes
        both forward and backward models
    NzTraining program called with:
 r   z--extra_args)r%   rI   /r-   )ud_allall_udUDBASEzLFiltering ud_all treebanks to only those which can use charlm for this modelc                    s2   g | ]} g t |d ddR  dur|qS )_r   r>   N)r	   r/   .0xchoose_charlm_methodr   r   
<listcomp>   s    "zmain.<locals>.<listcomp>zSFiltering ud_all treebanks to only those which can use a transformer for this modelc                 S   s&   g | ]}t |d d tv r|qS )r[   r   )r	   r/   r   r\   r   r   r   ra      s   & zExpanding %s to %sr   z)=========================================z%s: %setez%s_%sz!Save file for %s model for %s: %srJ   z%s_%s.ptzSave file for %s model: %ssaved_modelsz%s: %s exists, skipping!z)%s: %s does not exist, training new model)'loggerinfor3   sysargvdefault_pathsget_default_pathsrR   index
parse_argsparse_known_argssave_dirextendcallablerE   rA   endswithlowerr   get_ud_treebankscharlm_onlytransformer_onlyappendr0   r	   debug	save_namelenospathr/   r   r   rL   existsr;   tempfileNamedTemporaryFilename)run_treebank	model_dir
model_nameadd_specific_argsr#   build_model_filenamer`   r%   pathsrQ   idx
extra_argscommand_argsrE   rA   treebankud_treebankstreebank_idx
short_namesave_name_argsrw   save_name_dirsave_name_filename
model_pathrm   temp_output_filer   r_   r   mainM   s   















r   c                    s   t | | |du rtj ddddS  fdd|D }tdtdd	 |D d
fdd	|D }d
fdd	|D }|d | S )z Wrapper for lemma scorer. NTF)verbosecountsenhancedc                    s   g | ]} | j qS r   )f1)r]   key)
evaluationr   r   ra      s    z#run_eval_script.<locals>.<listcomp>   c                 s   s    | ]}t |V  qd S r    )rx   r]   er   r   r   	<genexpr>   s    z"run_eval_script.<locals>.<genexpr>rV   c                 3   s    | ]
}d    |V  qdS )z{:>%d}Nformatr   max_lenr   r   r      s    c                 3   s"    | ]}d    d| V  qdS )z{:%d.2f}d   Nr   r\   r   r   r   r      s     r,   )r
   ud_evalbuild_evaluation_tablemaxr3   )gold_conllu_filesystem_conllu_fileevalsresultsevals_stringresults_stringr   )r   r   r   run_eval_script   s   
r   c                 C      t | |g ddS )N)Tokens	SentencesWordsr   r   	eval_gold	eval_predr   r   r   run_eval_script_tokens   r+   r   c                 C   s   t | |dgdS )Nr   r   r   r   r   r   r   run_eval_script_mwt   s   r   c                 C   r   )N)UPOSXPOSUFeatsAllTagsr   r   r   r   r   r   run_eval_script_pos   r+   r   c                 C   r   )N)UASLASCLASMLASBLEXr   r   r   r   r   r   run_eval_script_depparse   r+   r   c              
   C   s  | | d }|d ur|d ur| | i  ||}|d ur{d|| |}tj|sGtd| zt| d d|i|d W n	 tyF   Y nw tj|r{|d urp|d urp| |v rp|||  v rptd|  d| d| d |S td	| d |S d
|| }t		|}t
|dkrtd|  d zt| |d W n ty }	 z	td| d|	d }	~	ww t		|}t
|dkrtd| d|  dt
|dkrtd| d|d }
td|
 d |
S )Nz{}/{}/pretrain/{}.ptz5Default pretrain should be {}  Attempting to downloadpretrainlangpackage
processorsr   zUsing default pretrain for :z, found in z>  To use a different pretrain, specify --wordvec_pretrain_filez.Using default pretrain for language, found in z{}/{}/pretrain/*.ptr   z-Cannot figure out which pretrain to use for 'z;'.  Will download the default package and hope for the best)r   r   zCannot find any pretrains in z  No pretrains in the system for this language.  Please prepare an embedding as a .pt and use --wordvec_pretrain_file to specify a .pt file to usez  Try 'stanza.download("zY")' to get a default pretrain or use --wordvec_pretrain_file to specify a .pt file to user   z%Too many pretrains to choose from in z9  Must specify an exact path to a --wordvec_pretrain_filezUsing pretrain found in )getr   ry   rz   r{   rd   re   r   r   globrx   warningFileNotFoundError)languagedefault_pretrainsdataset_pretrainsdatasetr   
default_ptdefault_pt_pathpretrain_path	pretrainsr   ptr   r   r   find_wordvec_pretrain   sH   $

r   c                 C   s  d ||| }tj|rtd| d|  d |S d ||| |}tj|r7td| d|  d |S z$t|d|  d|i|d tj|rZtd	| d|  d |W S W n tyy } ztd
|  d| d| d| d	|d}~ww td
|  d| d| d| d	)z
    Return the path to the forward or backward charlm if it exists for the given package

    If we can figure out the package, but can't find it anywhere, we try to download it
    z&saved_models/charlm/{}_{}_{}_charlm.ptzUsing model z for z charlmz{}/{}/{}_charlm/{}.ptN_charlmr   zDownloaded model, using model zCannot find z charlm in either z or z  Attempted downloading z but that did not work)	r   ry   rz   r{   rd   re   r   
ValueErrorr   )	directionr   rT   r   
saved_pathresource_pathr   r   r   r   find_charlm_file  s&   $"r   Tc           
      C   s  |rzt d| ||d}t d| ||d}W nT tyh } zH|| d r]|t| d d }zt d| ||d}t d| ||d}W n tyT } z	td||f |d}~ww td|| n W Y d}~nd}~ww d	|d
|g}	|ss|	S dd|  d| g|	 S g S )z?
    If specified, return forward and backward charlm args
    forward)r   backwardr[   r   NzYTried to find charlm %s, which doesn't exist.  Also tried %s, but didn't find that eitherzFWas asked to find charlm %s, which does not exist.  Did find %s thoughz--charlm_forward_filez--charlm_backward_filerS   z--charlm_shorthand)r   r   r2   rx   rd   r   )
r   rT   	base_argsr   r   r   r   short_charlme2	char_argsr   r   r   build_charlm_args'  s<   r   c                 C   sX   | | d}| | i  |d}|du rdS |dkr|S || | i v r&|S |r*|S dS )s
    charlm == "default" means the default charlm for this dataset or language
    charlm == None is no charlm
    Nr>   )r   )r   r   rT   language_charlmsdataset_charlmsdefault_charlmspecific_charlmr   r   r   choose_charlmG  s   r   c                 C      t | ||ttS r   )r   r   r   short_languager   rT   r   r   r   choose_pos_charlm\     r   c                 C   r   r   )r   r   r   r   r   r   r   choose_depparse_charlmc  r   r   c                 C   r   r   )r   r   r   r   r   r   r   choose_lemma_charlmj  r   r   Fc                 C   sv   g }|dur9|j r9d|vr9| tv r0dt| g}|r.| tv r.d|vr.|dtt| g |S |r9td|   |S )zJ
    Choose a transformer using the default options for this language
    Nz--bert_modelz--bert_hidden_layerszXTransformer requested, but no default transformer for %s  Specify one using --bert_model)use_bertr   r   r   rn   rP   rd   error)r   r   r   warnlayers	bert_argsr   r   r   choose_transformerq  s   r   c                 C      t | ||}t| |||}|S r    )r   r   r   r   rT   r   r   charlm_argsr   r   r   build_pos_charlm_args     r   c                 C   r   r    )r   r   r   r   r   r   build_lemma_charlm_args  r   r   c                 C   r   r    )r   r   r   r   r   r   build_depparse_charlm_args  r   r   r    )NNNNN)TF)8rM   r   loggingry   pathlibrf   r|   enumr   !stanza.resources.default_packagesr   r   r   r   r   r   stanza.models.common.constantr	   stanza.models.common.utilsr
   stanza.resources.commonr   r   r   stanza.utils.datasetsr   stanza.utils.default_pathsutilsrh   stanza.utilsr   r   	getLoggerrd   r   ArgumentParserr   rR   rU   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sJ     



v0 
