o
    hD                  
   @   s  d dl mZ d dl mZ d dlZd dlZd dlZd dlZd dlZed\
Z	Z
ZZZZZZZZh dZh dZh dZG dd	 d	eZd
d Zdd Zh dZh dZdd Zdd Zdd Zd!ddZdd Z dd Z!dd Z"e#dkrve"  G dd  d ej$Z%dS )"    )division)print_functionN
   >   acldepobjoblamodconjexplflatiobjlistnmodrootadvclapposccompcsubjfixednsubjxcompadvmodnummodorphancompoundgoeswithvocative	discourse	parataxis
dislocated
reparandum>   ccauxclfcopdetcasemark>   AbbrCaseMoodPossTenseVoiceAspectDegreeGenderNumberPersonPoliteReflexAnimacyEvidentForeignNumTypeDefinitePolarityPronTypeVerbFormc                   @   s   e Zd ZdS )UDErrorN)__name__
__module____qualname__ rB   rB   W/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/conll18_ud_eval.pyr>      s    r>   c                 C   &   t jd dkst| ts| S | dS Nr      utf-8)sysversion_info
isinstancestrdecodetextrB   rB   rC   _decode      &rO   c                 C   rD   rE   )rH   rI   rJ   unicodeencoderM   rB   rB   rC   _encode   rP   rS   >   r   r
   r   r   >   passrelclxsubjc                 C   sP   g }| dkr&| dkr&|  dD ]}| dd\}}| d}|||f q|S )N _|:   >)splitappend)depsedepsedephdpathstepsrB   rB   rC   process_enhanced_deps   s   
re   c           "   
      s  G dd d}G dd d}G dd d}| |_ d\}d 	 |  } d
7  |s.nt|d}d u rN|dr?q!j||d  tj|sl fdd}jd  D ]}	|	 |	j	t
 }
g }|	j	t
 D ]1\}}d|v r|ddrtd t|qvt|}|rj| d
  n|}|||f qv|}
|ddrg }|
D ]%\}}t|d
kr||	j|	j	t gf q||f|v rq|||f q|}
|ddr|
D ]\}}t|d
kr|d dr||fg}
q|ddr6g }|
D ]-\}}d}|
D ]\}}||kr&||	j	t kr&||kr&d
}q|s2|||f q|}
|ddrng }|
D ]*\}}d
}|r`|j	t dkr`|D ]}|dr^d}qT|rj|||f qA|}
|ddrg }|
D ];\}}|d dkr||	j|	j	t gf qy|r|j	t drt|j	t | krqy|||f qy|}
|ddrg }|
D ]7\}}g }|D ]&}|d}|d tv rt|dkr|d
 tvr|d }|| q|||f q|}
|d
7 }|
|	j	t
< qcjd  D ]}	|	jr|	jr|	jj|	 qtjkr.td   td!d" jd  D dkrEtd#  |d$dsctd%d" jd  D d
krctd&  |jd' _d q!|d(}t|d)krtd* t|d|t v r|ddrtd+ t|q!d,td-d. |t |t< |t std/  j|t  j|||t|t    |t|t 7 }d0|t v r[zt t|t d0\}}W n   td1t|t  || d
 }d}||k rZt|  d} d
7  |d(}t|d)kr)td* t|d|t v rC|ddrAtd+ t|qj|jd' |d	d2 |d
7 }||k snyzt|t }W n   td3t|t  |tj d
 krtd4t|t t|t tj d
  zt|t } W n t!y }! ztd5t|t  |!d }!~!ww | dk rtd6  j|jd' |dd2 q"d urtd7S )8Nc                   @      e Zd Zdd ZdS )z%load_conllu.<locals>.UDRepresentationc                 S   s"   g | _ g | _g | _g | _d| _d S )NrW   )
characterstokenswords	sentencesrc   selfrB   rB   rC   __init__   s
   
z.load_conllu.<locals>.UDRepresentation.__init__Nr?   r@   rA   rm   rB   rB   rB   rC   UDRepresentation       ro   c                   @   rf   )zload_conllu.<locals>.UDSpanc                 S   s   || _ || _|| _d S N)startendline)rl   rr   rs   rt   rB   rB   rC   rm      s   
z$load_conllu.<locals>.UDSpan.__init__Nrn   rB   rB   rB   rC   UDSpan   rp   ru   c                   @   rf   )zload_conllu.<locals>.UDWordc                 S   s   || _ || _|| _d | _g | _dtdd |t dD | jt< |t	 dd | jt	< | jt	 t
v | _| jt	 tv | _t|t | jt< d S )NrY   c                 s   s(    | ]}| d dd tv r|V  qdS )=r[   r   N)r]   UNIVERSAL_FEATURES).0featrB   rB   rC   	<genexpr>   s    z7load_conllu.<locals>.UDWord.__init__.<locals>.<genexpr>rZ   r   )spancolumnsis_multiwordparentfunctional_childrenjoinsortedFEATSr]   DEPRELCONTENT_DEPRELSis_content_deprelFUNCTIONAL_DEPRELSis_functional_deprelre   DEPS)rl   r{   r|   r}   rB   rB   rC   rm      s   (z$load_conllu.<locals>.UDWord.__init__Nrn   rB   rB   rB   rC   UDWord   rp   r   )r   Nr   Tr[   z
#c                    s   | j dkrtd  | j d u rHt| jt }|dk s$|tj kr1tdt| jt  |rJj| d  }d| _ | || _ d S d S d S )N	remappingz5There is a cycle in the sentence that ends at line %dr   z=HEAD '{}' points outside of the sentence that ends at line {}r[   )	r~   r>   intr|   HEADlenri   formatrS   )wordheadr~   line_idxprocess_wordsentence_startudrB   rC   r      s   


z!load_conllu.<locals>.process_word.no_empty_nodesFzRThe collapsed CoNLL-U file still contains references to empty nodes at line {}: {}
no_gapping!no_shared_parents_in_coordinationr
   $no_shared_dependents_in_coordination
no_controlr   r   )no_external_arguments_of_relative_clausesrefr   no_case_inforZ      zKThere is a sentence with 0 tokens (possibly a double blank line) at line %dc                 S      g | ]	}|j d u r|qS rq   r~   rx   r   rB   rB   rC   
<listcomp>e      zload_conllu.<locals>.<listcomp>z2There are no roots in the sentence that ends at %dmultiple_roots_okayc                 S   r   rq   r   r   rB   rB   rC   r   h  r   z8There are multiple roots in the sentence that ends at %d	r   zKThe CoNLL-U line does not contain 10 tab-separated columns at line {}: '{}'zDThe collapsed CoNLL-U line still contains empty nodes at line {}: {}rW   c                 S   s   t | dkS )NZs)unicodedatacategory)crB   rB   rC   <lambda>  s    zload_conllu.<locals>.<lambda>z5There is an empty FORM in the CoNLL-U file at line %d-z0Cannot parse multi-word token ID '{}' at line {})r}   z$Cannot parse word ID '{}' at line {}z>Incorrect word ID '{}' for word '{}', expected '{}' at line {}z!Cannot parse HEAD '{}' at line {}z"HEAD cannot be negative at line %dz-The CoNLL-U file does not end with empty line)"rc   readlinerO   rstrip
startswithrj   r^   r   ri   r|   r   getr>   r   rS   r   r~   r   r   r]   CASE_DEPRELSUNIVERSAL_DEPREL_EXTENSIONSr   r   rs   IDr   filterFORMrg   extendrh   map
ValueError)"filerc   treebank_typero   ru   r   indexrt   positionr   enhanced_depsprocessed_depsr   rd   rb   r~   	duplicatehd2steps2includerelprocessed_stepsr   deppartsr|   rr   rs   words_expectedwords_found	word_lineword_columnsword_idhead_iderB   r   rC   load_conllu   sN  



$0
""

 


&
  
ar   c                    s~  G dd dG dd dG fddd fdd}d9fd	d
	}fdd}dd dd fdddd  fdd}j j krd}|tj k r|tj k rj | j | kr|d7 }|tj k r|tj k rj | j | ksfd}|tjk rj| jd |k r|d7 }|tjk rj| jd |k sd}|tjk r҈j| jd |k r|d7 }|tjk r҈j| jd |k sd}	d}
|dkrA|dkrdn|}|d tjkrdntj| }|| }dfddj|| D }dfddj|||  D }d j}	|	d!|j|d  j7 }	|	d"||7 }	|	d#||7 }	|dkr|dkrMdn|}|d tjkr[dntj| }|| }dfd$dj|| D }dfd%dj|||  D }d j}
|
d!|j|d  j7 }
|
d"||7 }
|
d#||7 }
td&|	 |
 d'dt	t
j ||d(  dt	t
j ||d(   |jj}|jj|jj||||d)d* ||d+d* ||d,d* ||d-d* ||d.d* ||d/d* ||d0d* ||d||d||d1d* d2d* d3||d4d* d5d* d3||d6d* d7d* d3d8S ):Nc                   @   s   e Zd ZdddZdS )zevaluate.<locals>.ScoreNc                 S   sv   || _ || _|| _|| _|r|| nd| _|r|| nd| _|| r*d| ||  nd| _|r6|| | _d S || _d S )Ng        r   )correct
gold_totalsystem_totalaligned_total	precisionrecallf1aligned_accuracy)rl   r   r   r   r   rB   rB   rC   rm     s   z evaluate.<locals>.Score.__init__rq   rn   rB   rB   rB   rC   Score  s    r   c                   @   rf   )zevaluate.<locals>.AlignmentWordc                 S   s   || _ || _d S rq   )	gold_wordsystem_wordrl   r   r   rB   rB   rC   rm     s   
z(evaluate.<locals>.AlignmentWord.__init__Nrn   rB   rB   rB   rC   AlignmentWord  rp   r   c                       s    e Zd Zdd Z fddZdS )zevaluate.<locals>.Alignmentc                 S   s   || _ || _g | _i | _d S rq   )
gold_wordssystem_wordsmatched_wordsmatched_words_map)rl   r   r   rB   rB   rC   rm     s   
z$evaluate.<locals>.Alignment.__init__c                    s    | j  || || j|< d S rq   )r   r^   r   r   r   rB   rC   append_aligned_words  s   z0evaluate.<locals>.Alignment.append_aligned_wordsN)r?   r@   rA   rm   r   rB   r   rB   rC   	Alignment  s    r   c                    s   d\}}}|t | k rO|t |k rO|| j| | jk r |d7 }n#| | j|| jk r/|d7 }n|| | j|| jk7 }|d7 }|d7 }|t | k rO|t |k s t | t ||S )N)r   r   r   r[   )r   rr   rs   )
gold_spanssystem_spansr   gisir   rB   rC   spans_score  s   



zevaluate.<locals>.spans_scorec           
         s   d ur)t fdd jD }t fdd jD }t fdd jD }nt j}t j}t j}|d u rB|||S dd } fdd}d	} jD ]}	d u s\|	jrl||	j|||	j|krl|d
7 }qQ||||S )Nc                 3       | ]	} |rd V  qdS r[   NrB   )rx   gold	filter_fnrB   rC   rz         z4evaluate.<locals>.alignment_score.<locals>.<genexpr>c                 3   r   r   rB   )rx   systemr   rB   rC   rz     r   c                 3   s    | ]
} |j rd V  qdS r   )r   r   r   rB   rC   rz     s    c                 S   s   | S rq   rB   r   rB   rB   rC   gold_aligned_gold  s   z<evaluate.<locals>.alignment_score.<locals>.gold_aligned_goldc                    s   | d ur j | dS d S )N
NotAligned)r   r   r   )	alignmentrB   rC   gold_aligned_system  s   z>evaluate.<locals>.alignment_score.<locals>.gold_aligned_systemr   r[   )sumr   r   r   r   r   r   )
r   key_fnr   r   r   alignedr   r   r   ri   r   )r   r   rC   alignment_score  s$   



z!evaluate.<locals>.alignment_scorec                    s   d}| j D ]}|t|jt 7 }qd}| jD ]}|t|jt 7 }qd}| jD ]N}|jjt }|jjt }	|D ]=\}
}dd |D }|	D ]/\}}dd |D }||ksY||krs|rs|
| j	|dkrg|d7 }qD|
dkrs|dkrs|d7 }qDq7q' |||S )Nr   c                 S      g | ]	}| d d qS rZ   r   r]   rx   drB   rB   rC   r     r   z>evaluate.<locals>.enhanced_alignment_score.<locals>.<listcomp>c                 S   r   r   r   r   rB   rB   rC   r     r   r   r[   )
r   r   r|   r   r   r   r   r   r   r   )r   EULASr   r   r   r   r   ri   	gold_depssystem_depsr~   r   	eulas_depsparentsdep
eulas_sdepr   rB   rC   enhanced_alignment_score  s.   



	z*evaluate.<locals>.enhanced_alignment_scorec                 S   s:   |t | krdS | | jr| | jj|kS | | jj|kS )NT)r   r}   r{   rr   rs   )ri   imultiword_span_endrB   rB   rC   
beyond_end  s
   
zevaluate.<locals>.beyond_endc                 S   s   | j r| jj|kr| jjS |S rq   )r}   r{   rs   )r   r  rB   rB   rC   
extend_end  s   zevaluate.<locals>.extend_endc                    s  | | j r!| | jj}|| j s || jj| | jjk r |d7 }n|| jj}| | j s<| | jj|| jjk r<|d7 }||}} | ||rM |||s|t| k rq|t|kse| | jj|| jjkrq| | |}|d7 }n|| |}|d7 } | ||rM |||rM||||fS )Nr[   )r}   r{   rs   rr   r   )r   r   r   r   r  gsss)r	  r
  rB   rC   find_multiword_span$  s,   
""



	z%evaluate.<locals>.find_multiword_spanc           	         s0   fddt || D }tt || D ]}tt   D ]t}| ||  jt  ||  jt  krZd|d || k rS|d   k rS||d  |d  nd || |< t|| | |d || k rp||d  | nd|| |< t|| | |d   k r|| |d  nd|| |< q q|S )Nc                    s   g | ]	}d g   qS )r   rB   )rx   r  r   r  rB   rC   r   @  r   z1evaluate.<locals>.compute_lcs.<locals>.<listcomp>r[   r   )rangereversedr|   r   lowermax)	r   r   r   r   r  r  lcsgsrB   r  rC   compute_lcs?  s   ,D:<zevaluate.<locals>.compute_lcsc           
         s   | |}d\}}|t | k r|t |k r| | js|| jr| |||\}}}}||kr||kr| |||||}d\}}	|	|| k r||| k r| ||	  jt  |||  jt  krx|| ||	  |||   |	d7 }	|d7 }n!||	 | |	d || k r||	d  | ndkr|	d7 }	n|d7 }|	|| k r||| k sKn@| | jj| | jjf|| jj|| jjfkr|| | ||  |d7 }|d7 }n| | jj|| jjkr|d7 }n|d7 }|t | k r|t |k s|S )N)r   r   r[   r   )	r   r}   r|   r   r  r   r{   rr   rs   )
r   r   r   r   r   r  r  r  r  r  )r   r  r  rB   rC   align_wordsI  s6   
,
0
0

zevaluate.<locals>.align_wordsr   r[   z:The error occurs right at the beginning of the two files.
rW   r    c                    $   g | ]}d   j|j|j qS rW   r   rg   rr   rs   rx   tgold_udrB   rC   r        $ zevaluate.<locals>.<listcomp>c                    r  r  r  r  r  rB   rC   r     r   zFile '{}':
z`  Token no. {} on line no. {} is the last one with all characters reproduced in the other file.
z#  The previous {} tokens are '{}'.
z  The next {} tokens are '{}'.
c                    r  r  r  r  	system_udrB   rC   r     r   c                    r  r  r  r  r!  rB   rC   r     r   zDThe concatenation of tokens in gold file and in system file differ!
zFFirst 20 differing characters in gold file: '{}' and system file: '{}'   c                 S   
   | j t S rq   )r|   UPOSwrX   rB   rB   rC   r        
 zevaluate.<locals>.<lambda>c                 S   r$  rq   )r|   XPOSr&  rB   rB   rC   r     r(  c                 S   r$  rq   )r|   r   r&  rB   rB   rC   r     r(  c                 S   s   | j t | j t | j t fS rq   )r|   r%  r)  r   r&  rB   rB   rC   r     s    c                 S   s    || j t dkr| j t S dS NrX   )r|   LEMMAr'  garB   rB   rC   r     s     c                 S   s
   || j S rq   r   r,  rB   rB   rC   r     r(  c                 S      || j | jt fS rq   r~   r|   r   r,  rB   rB   rC   r         c                 S   r.  rq   r/  r,  rB   rB   rC   r     r0  c                 S      | j S rq   r   r'  rB   rB   rC   r         r   c                    s6    | j | jt | jt | jt  fdd| jD fS )Nc                    s.   g | ]} ||j t |j t |j t fqS rB   )r|   r   r%  r   )rx   r   r-  rB   rC   r     s    &z.evaluate.<locals>.<lambda>.<locals>.<listcomp>)r~   r|   r   r%  r   r   r,  rB   r5  rC   r     s
     
c                 S   r1  rq   r2  r3  rB   rB   rC   r     r4  c                 S   s4   || j | jt || jt dkr| jt fS dfS r*  )r~   r|   r   r+  r,  rB   rB   rC   r     s
    c                 S   r1  rq   r2  r3  rB   rB   rC   r     r4  )Tokens	SentencesWordsr%  r)  UFeatsAllTagsLemmasUASLASELASr   CLASMLASBLEX)NN)rg   r   rh   rs   r   r   rc   rt   r>   r   rS   ri   rj   )r  r"  r   r   r  r  r   gtindexstindexgtokenreportstokenreportnprevnnextnfirst
prevtokens
nexttokensr   rB   )	r   r   r   r	  r  r
  r  r  r"  rC   evaluate  s   


%""""
$"&
&"&
	


rK  c                 C   s>   |d u ri }t | fdditjdkrddini }t|| |S )NmoderrF   r   encodingrG   )openrH   rI   r   )rc   r   _filerB   rB   rC   load_conllu_file  s   &rR  c                 C   s   i }t | j}d|v rdnd|d< d|v rdnd|d< d|v r!dnd|d< d	|v r+dnd|d
< d|v r5dnd|d< d|v r?dnd|d< | j|d< | j|d< t| j|}t| j|}t||S )N1r[   r   r   2r   3r   4r   5r   6r   r   r   )r   enhancementsr   r   rR  	gold_filesystem_filerK  )argsr   rY  r  r"  rB   rB   rC   evaluate_wrapper  s   



r]  c                 C   s  g }|sJ|sJ| dd| d j  | dd| d j  | dd| d j  |rI| dd| d	 j  | d
d| d j  nu|rR| d n| d | d g d}|rh|d	dg7 }|D ]T}|r| d|| | j| | j| | j| | jp|dkr| | jnd qj| d|d| | j d| | j d| | j | | j	d urdd| | j	 nd qjd
|S )NzLAS F1 Score: {:.2f}d   r=  zMLAS Score: {:.2f}r@  zBLEX Score: {:.2f}rA  zELAS F1 Score: {:.2f}r>  zEULAS F1 Score: {:.2f}r   z8Metric     | Correct   |      Gold | Predicted | Alignedz:Metric     | Precision |    Recall |  F1 Score | AligndAccz;-----------+-----------+-----------+-----------+-----------)r6  r7  r8  r%  r)  r9  r:  r;  r<  r=  r?  r@  rA  z {:11}|{:10} |{:10} |{:10} |{:10}r8  rW   z&{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}z{:10.2f}
)r^   r   r   r   r   r   r   r   r   r   r   )
evaluationverbosecountsenhancedrN   metricsmetricrB   rB   rC   build_evaluation_table  sB   

$
rf  c                  C   s   t  } | jdtdd | jdtdd | jdddd	d
d | jdddd	dd | jdddddd | jdtddd | jdddd | jddd	dd |  }t|}t||j|j|j	}t
| d S )NrZ  z,Name of the CoNLL-U file with the gold data.)typehelpr[  z1Name of the CoNLL-U file with the predicted data.z	--verbosez-vF
store_truezPrint all metrics.)defaultactionrh  z--countsz-czePrint raw counts of correct/gold/system/aligned words instead of precision/recall/F1 for all metrics.z--no-enhancedrc  store_falseTz-Turn off evaluation of enhanced dependencies.)destrk  rj  rh  z--enhancements0zLevel of enhancements in the gold data (see guidelines) 0=all (default), 1=no gapping, 2=no shared parents, 3=no shared dependents 4=no control, 5=no external arguments, 6=no lemma info, combinations: 12=both 1 and 2 apply, etc.)rg  rj  rh  z--no-empty-nodeszEmpty nodes have been collapsed (needed to correctly evaluate enhanced/gapping). Raise exception if an empty node is encountered.)rj  rh  z--multiple-roots-okayz6A single sentence can have multiple nodes with HEAD=0.)argparseArgumentParseradd_argumentrK   
parse_argsr]  rf  ra  rb  rc  print)parserr\  r`  resultsrB   rB   rC   main  s:   

rv  __main__c                   @   sH   e Zd Zedd Zdd Zdd Zdd Zd	d
 Zdd Z	dd Z
dS )TestAlignmentc                 C   s   g d}}| D ]Q}| d}t|dkr)|d7 }|d||d t|dk q|d|d |t| d |d  |dd D ]}|d7 }|d||t|dk qCqttjdkrbtj	ntj
d|dg S )	zKPrepare fake CoNLL-U files with fake HEAD to prevent multiple roots errors.r   r  r[   z{}	{}	_	_	_	_	{}	_	_	_z{}-{}	{}	_	_	_	_	_	_	_	_NrN  r_  )r]   r   r^   r   r   r   rH   rI   ioStringIOBytesIOr   )ri   lines	num_wordsr'  partspartrB   rB   rC   _load_words  s   

"(*zTestAlignment._load_wordsc                 C   s    |  tt| || | d S rq   )assertRaisesr>   rK  r  )rl   r   r   rB   rB   rC   _test_exception"  s    zTestAlignment._test_exceptionc                 C   s|   t | || |}tdd |D }tdd |D }| |d j|d j|d jf|| || d| ||  f d S )Nc                 s   (    | ]}t d t|dd  V  qdS r[   r  Nr  r   r]   r   rB   rB   rC   rz   '     & z)TestAlignment._test_ok.<locals>.<genexpr>c                 s   r  r  r  r   rB   rB   rC   rz   (  r  r8  r   )rK  r  r   assertEqualr   r   r   )rl   r   r   r   rd  r   r   rB   rB   rC   _test_ok%  s   zTestAlignment._test_okc                 C   s   |  dgdg d S )Nab)r  rk   rB   rB   rC   test_exception,  s   zTestAlignment.test_exceptionc                 C   s,   |  dgdgd |  g dg dd d S )Nr  r[   r  r  r   rF   r  rk   rB   rB   rC   
test_equal/  s   zTestAlignment.test_equalc                 C   sX   |  dgg dd |  g dg dd |  dgdd	gd |  dd
gg dd d S )Nz	abc a b cr  rF   r  zbc b cr   r  r  r   r      zabcd a b c dab a bcd c dzde d e)r  z	bcd b c dr      r  rk   rB   rB   rC   test_equal_with_multiword3  s   z'TestAlignment.test_equal_with_multiwordc                 C   s   |  dgg dd |  ddgg dd |  g dg dd |  g d	g d
d |  ddgg dd |  ddgg dd |  g d	ddgd d S )Nabcdr  r   abcr   r[   )r  bcr   r   r  )r  r  cdz
abc a BX cz
def d EX f)r  r  zef e fr  r  zcd bc dzab AX BXzcd CX ar  rk   rB   rB   rC   test_alignment9  s   zTestAlignment.test_alignmentN)r?   r@   rA   staticmethodr  r  r  r  r  r  r  rB   rB   rB   rC   rx    s    
rx  rq   )&
__future__r   r   ro  ry  rH   r   unittestr  r   r   r+  r%  r)  r   r   r   r   MISCr   r   rw   	Exceptionr>   rO   rS   r   r   re   r   rK  rR  r]  rf  rv  r?   TestCaserx  rB   rB   rB   rC   <module>   s:   f   
x(