o
    hdU                     @   s   d Z ddlmZmZ ddlZddlmZ ddlmZ ddl	Z	ddl
Z
ddlZddlmZ dZdZd	Zd
Ze
dZdZG dd deZG dd deZdS )z
Tree datastructure
    )dequeCounterN)Enum)StringIO)StanzaObject) ( z[-=#*])z*E*z*T*z*O*c                   @   s$   e Zd ZdZdZdZdZdZdZdS )TreePrintMethodzz
    Describes a few options for printing trees.

    This probably doesn't need to be used directly.  See __format__
                   N)	__name__
__module____qualname____doc__ONE_LINELABELED_PARENSPRETTYVLSP
LATEX_TREEr
   r
   r
   `/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/constituency/parse_tree.pyr   !   s    r   c                   @   s4  e Zd ZdZdBddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dCddZdd Zdd Zdd Zdd ZdDddZedd Zedd  Zed!d" Zed#d$ Zed%d& ZedEd(d)Zed*d+ ZedFd-d.Zefd/d0Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Z d;d< Z!d=d> Z"edGd@dAZ#dS )HTreez4
    A data structure to represent a parse tree
    Nc                 C   s8   |d u rt | _nt|tr|f| _nt|| _|| _d S N)EMPTY_CHILDRENchildren
isinstancer   tuplelabel)selfr!   r   r
   r
   r   __init__2   s   



zTree.__init__c                 C   s   t | jdkS )Nr   lenr   r"   r
   r
   r   is_leaf<   s   zTree.is_leafc                 C   s"   t | jdkot | jd jdkS )Nr   r   r$   r&   r
   r
   r   is_preterminal?   s   "zTree.is_preterminalc                 c   sx    |   r
| V  dS |  rtdt| j}t|d}|dur:|  r(|V  nt|j|}t|d}|dus dS dS )z?
        Yield the preterminals one at a time in order
        Nz6Attempted to iterate preterminals on non-internal node)r(   r'   
ValueErroriterr   next	itertoolschain)r"   iteratornoder
   r
   r   yield_preterminalsB   s   


zTree.yield_preterminalsc                 C   s&   |   r| jgS dd |  D }|S )z.
        Get the labels of the leaves
        c                 S   s   g | ]}|j d  jqS )r   )r   r!   .0xr
   r
   r   
<listcomp>]       z$Tree.leaf_labels.<locals>.<listcomp>)r'   r!   r0   )r"   wordsr
   r
   r   leaf_labelsV   s   zTree.leaf_labelsc                 C   s   t |  S r   )r%   r7   r&   r
   r
   r   __len__`   s   zTree.__len__c                 C   s,   |   rdS |  rdS tdd | jD S )zT
        Returns True if all leaves are under preterminals, False otherwise
        FTc                 s       | ]}|  V  qd S r   )all_leaves_are_preterminalsr2   tr
   r
   r   	<genexpr>m       z3Tree.all_leaves_are_preterminals.<locals>.<genexpr>)r'   r(   allr   r&   r
   r
   r   r:   c   s
   z Tree.all_leaves_are_preterminalsc              
   C   s:  |du rdd }d}t  }t }||  t|dkr| }|tu rT|tu rD|d8 }|t t|dkr<d}n| }|tu s*|d |du rNn|| n| r|d|  |dt||j	||j
d j	tf  t|dks|d	 tur|d nwtd
d |j
D r|d|  |dt||j	f  |j
D ]}|dt||j	||j
d j	tf  q|t t|dks|d	 tur|d n)|d|  |dt||j	f  |t t|j
D ]}|| q|d7 }t|dks|d | W  d   S 1 sw   Y  dS )aN  
        Print with newlines & indentation on each line

        Preterminals and nodes with all preterminal children go on their own line

        You can pass in your own normalize() function.  If you do,
        make sure the function updates the parens to be something
        other than () or the brackets will be broken
        Nc                 S   s   |  dd ddS )Nr	   -LRB-r   -RRB-replacer3   r
   r
   r   <lambda>z       z#Tree.pretty_print.<locals>.<lambda>r   r   
z  z	%s%s %s%sc                 s   r9   r   )r(   r1   r
   r
   r   r=      r>   z$Tree.pretty_print.<locals>.<genexpr>z%s%sz
 %s%s %s%sz%s%s
)r   r   appendr%   popCLOSE_PARENwriter(   
OPEN_PARENr!   r   r?   reversedseekread)r"   	normalizeindentbufstackr/   childr
   r
   r   pretty_printo   s\   




(

*



&&zTree.pretty_printc           	         s  dt j}|dkrt j}dn|r|d dkrt j}|d n|dkr't j}n|r7|d dkr7t j}|d nt|dkr?t j}nl|rO|d dkrOt j}|d n\|ra|d dkrat j}|d d	k}nJ|r}t|d
kr}|d
 dkr}t j}|d |d d	k}n.|dkrt j}n&|rt|d
kr|d
 dkrt j}|d n|r|d tjd	dd |t jkrdnd |t jkrdnd fdd}|t ju r| 
|S t o}t }|t jkr|r|d	| j n|d t| jdkrtdt| jd
kr
tdt| j || jd  nZ|t jkrh|d t| jdkr*tdt| jd
krSt| jd jdkrS|d ||| jd j |d n| jdkrb|| jd  n||  n||  t|dkr'| }t|tr|| qmt|jdkr|jdur|||j qm|t ju r| r|||jd j qm|d||j  |d nH|t ju s|t ju r|t |jdur|||j |t n#|t ju r|d t||jf  |td ||j  |t t|jD ]}|| |t qt|dkst|t jkr2|d! |d | W  d   S 1 sFw   Y  dS )"aT  
        Turn the tree into a string representing the tree

        Note that this is not a recursive traversal
        Otherwise, a tree too deep might blow up the call stack

        There is a type specific format:
          O       -> one line PTB format, which is the default anyway
          L       -> open and close brackets are labeled, spaces in the tokens are replaced with _
          P       -> pretty print over multiple lines
          V       -> surround lines with <s>...</s>, don't print ROOT, and turn () into L/RBKT
          ?       -> spaces in the tokens are replaced with ? for any value of ? other than OLP
                     warning: this may be removed in the future
          ?{OLPV} -> specific format AND a custom space replacement
          Vi      -> add an ID to the <s> in the V format.  Also works with ?Vi
        r   L_rH   r   OPVir   Tz]Use of a custom replacement without a format specifier is deprecated.  Please use {}O insteadr   )
stacklevelLBKTr@   RBKTrA   c                    s   |  d d  dS )Nr   r	   r   rB   )textLRBRRBspace_replacementr
   r   rQ      s   z"Tree.__format__.<locals>.normalizez
<s id={}>
z<s>
z(Cannot print an empty tree with V formatz2Cannot print a tree with %d branches with V formatz\Tree z(Cannot print an empty tree with T formatz[.? z ]ROOTNz[.%sz%s_%sz
</s>)r   r   r   r   r   r%   r   warningswarnformatrV   r   r   rL   tree_idr   r)   rI   r!   rJ   r   strr(   rM   rK   SPACE_SEPARATORrN   rO   rP   )	r"   specprint_formatuse_tree_idrQ   rS   rT   r/   rU   r
   rb   r   
__format__   s   








&








&zTree.__format__c                 C   s
   d | S )N{})ri   r&   r
   r
   r   __repr__!  s   
zTree.__repr__c                 C   sf   | |u rdS t |tsdS | j|jkrdS t| jt|jkr!dS tdd t| j|jD r1dS dS )NTFc                 s   s    | ]	\}}||kV  qd S r   r
   )r2   c1c2r
   r
   r   r=   -  s    zTree.__eq__.<locals>.<genexpr>)r   r   r!   r%   r   anyzip)r"   otherr
   r
   r   __eq__$  s   
zTree.__eq__c                 C   s"   | j sdS dtdd | j D  S )Nr   r   c                 s   r9   r   )depthr1   r
   r
   r   r=   4  r>   zTree.depth.<locals>.<genexpr>)r   maxr&   r
   r
   r   ry   1  s   z
Tree.depthc                 C   sV   |   r|r
||  n|  r|r||  n|r||  | jD ]	}|||| qdS )a-  
        Visit the tree in a preorder order

        Applies the given functions to each node.
        internal: if not None, applies this function to each non-leaf, non-preterminal node
        preterminal: if not None, applies this functiion to each preterminal
        leaf: if not None, applies this function to each leaf

        The functions should *not* destructively alter the trees.
        There is no attempt to interpret the results of calling these functions.
        Rather, you can use visit_preorder to collect stats on trees, etc.
        N)r'   r(   r   visit_preorder)r"   internalpreterminalleafrU   r
   r
   r   r{   6  s   
zTree.visit_preorderc                 C   s*   t | tr| g} t| }tt| S )zi
        Walks over all of the trees and gets all of the unique constituent names from the trees
        )r   r   get_constituent_countssortedsetkeys)treesconstituentsr
   r
   r   get_unique_constituent_labelsO  s   

z"Tree.get_unique_constituent_labelsc                    s8   t | tr| g} t  | D ]}|j fddd q S )zo
        Walks over all of the trees and gets the count of the unique constituent names from the trees
        c                         | jgS r   updater!   rD   r   r
   r   rE   c      z-Tree.get_constituent_counts.<locals>.<lambda>)r|   )r   r   r   r{   r   treer
   r   r   r   Y  s   
zTree.get_constituent_countsc                    <   t | tr| g} t  | D ]}|j fddd qt S )z\
        Walks over all of the trees and gets all of the unique tags from the trees
        c                         | jS r   addr!   rD   tagsr
   r   rE   p      z&Tree.get_unique_tags.<locals>.<lambda>)r}   r   r   r   r{   r   r   r
   r   r   get_unique_tagsf     
zTree.get_unique_tagsc                    r   )z]
        Walks over all of the trees and gets all of the unique words from the trees
        c                    r   r   r   rD   r6   r
   r   rE   }  r   z'Tree.get_unique_words.<locals>.<lambda>r~   r   r   r
   r   r   get_unique_wordss  r   zTree.get_unique_wordsc                    s`   |dkrt  S t| tr| g} t  | D ]}|j fddd qtdd   d| D S )z[
        Walks over all of the trees and gets the most frequently occurring words.
        r   c                    r   r   r   rD   r   r
   r   rE     r   z'Tree.get_common_words.<locals>.<lambda>r   c                 s       | ]}|d  V  qdS r   Nr
   r1   r
   r
   r   r=     r>   z(Tree.get_common_words.<locals>.<genexpr>N)r   r   r   r   r{   r   most_common)r   	num_wordsr   r
   r   r   get_common_words  s   
zTree.get_common_words皙?c                    sp   t | tr| g} t  | D ]}|j fddd qttt | d}tdd   d| d d D S )	z
        Walks over all of the trees and gets the least frequently occurring words.

        threshold: choose the bottom X percent
        c                    r   r   r   rD   r   r
   r   rE     r   z%Tree.get_rare_words.<locals>.<lambda>r   r   c                 s   r   r   r
   r1   r
   r
   r   r=     r>   z&Tree.get_rare_words.<locals>.<genexpr>NrH   )	r   r   r   r{   rz   intr%   r   r   )r   	thresholdr   r
   r   r   get_rare_words  s   
&zTree.get_rare_wordsc                 C   s   t tdd | D S )Nc                 s       | ]}|j V  qd S r   r!   r1   r
   r
   r   r=         z'Tree.get_root_labels.<locals>.<genexpr>)r   r   )r   r
   r
   r   get_root_labels  s   zTree.get_root_labelsFc                 C   s   t  }t }| D ]r}|r||jf |jD ]}|| qn|| t|dkrz| }| s6|	 r7q$|jg}t|jdkrb|jd 	 sb|jd }||j t|jdkrb|jd 	 rI|t
| |jD ]}|| qlt|dks*qt|S )Nr   r   )r   r   r   r!   r   rI   r%   rJ   r'   r(   r    r   )r   separate_rootr   rT   r   rU   r/   labelsr
   r
   r   get_compound_constituents  s0   



zTree.get_compound_constituentsc                    sR   | j }|r|  st|dkr|dvr |d } fdd| jD }t||S )zn
        Return a copy of the tree with the -=# removed

        Leaves the text of the leaves alone.
        r   )r@   rA   r   c                       g | ]}|  qS r
   )simplify_labelsr2   rU   patternr
   r   r4         z(Tree.simplify_labels.<locals>.<listcomp>)r!   r'   r%   splitr   r   )r"   r   	new_labelnew_childrenr
   r   r   r     s
    
zTree.simplify_labelsc                 C   s2   |   r	t| jS dd t| jD }t| j|S )z
        Flip a tree backwards

        The intent is to train a parser backwards to see if the
        forward and backwards parsers can augment each other
        c                 S      g | ]}|  qS r
   )reverser   r
   r
   r   r4     rF   z Tree.reverse.<locals>.<listcomp>)r'   r   r!   rN   r   r"   r   r
   r
   r   r     s   
zTree.reversec                    s\   |   r	t| jS |  rt| jt| jd jS  | j| j}t| fdd| jD S )
        Copies the tree with some labels replaced.

        Labels in the map are replaced with the mapped value.
        Labels not in the map are unchanged.
        r   c                    r   r
   )remap_constituent_labelsr   	label_mapr
   r   r4     r   z1Tree.remap_constituent_labels.<locals>.<listcomp>)r'   r   r!   r(   r   get)r"   r   r   r
   r   r   r     s   
zTree.remap_constituent_labelsc                    s\   |   r | j| j}t|S |  r t| j| jd  S t| j fdd| jD S )r   r   c                    r   r
   )remap_wordsr   word_mapr
   r   r4     r   z$Tree.remap_words.<locals>.<listcomp>)r'   r   r!   r   r(   r   r   )r"   r   r   r
   r   r   r     s   zTree.remap_wordsc                    s<   t | fdd  | }tdd D rtd|S )zs
        Replace all leaf words with the words in the given list (or iterable)

        Returns a new tree
        c                    sF   |   rtd }|d u rtdt|S t| j fdd| jD S )Nz&Not enough words to replace all leavesc                    s   g | ]} |qS r
   r
   r1   )recursive_replace_wordsr
   r   r4      rF   zGTree.replace_words.<locals>.recursive_replace_words.<locals>.<listcomp>)r'   r+   r)   r   r!   r   )subtreewordr   word_iteratorr
   r   r     s   
z3Tree.replace_words.<locals>.recursive_replace_wordsc                 s       | ]}d V  qdS TNr
   r2   rX   r
   r
   r   r=         z%Tree.replace_words.<locals>.<genexpr>z!Too many words for the given tree)r*   ru   r)   )r"   r6   new_treer
   r   r   replace_words  s   zTree.replace_wordsc                 C   s   |   rtdt|trdd | D }nt|}t| }t }|	| t
|dkre| }| rLzt|}W n tyG   tdw ||_n|  rWtd| |t|j t
|dks.tdd |D rrtd|S )	Nz$Must call replace_tags with non-leafc                 s   r   r   r   r1   r
   r
   r   r=     r   z$Tree.replace_tags.<locals>.<genexpr>r   z*Not enough tags in sentence for given treezGot a badly structured tree: {}c                 s   r   r   r
   r   r
   r
   r   r=   !  r   z Too many tags for the given tree)r'   r)   r   r   r0   r*   copydeepcopyr   rI   r%   rJ   r(   r+   StopIterationr!   ri   extendrN   r   ru   )r"   r   tag_iteratorr   queue	next_noder!   r
   r
   r   replace_tags  s0   


zTree.replace_tagsc                 C   s   |   r	t| jS |  r(| jdks| jd jtv rdS t| jt| jd jS dd | jD }dd |D }t|dkr?dS t| j|S )a  
        Return a copy of the tree, eliminating all nodes which are in one of two categories:
            they are a preterminal -NONE-, such as appears in PTB
              *E* shows up in a VLSP dataset
            they have been pruned to 0 children by the recursive call
        z-NONE-r   Nc                 S   r   r
   )
prune_noner   r
   r
   r   r4   5  rF   z#Tree.prune_none.<locals>.<listcomp>c                 S   s   g | ]}|d ur|qS r   r
   r   r
   r
   r   r4   6  r5   )r'   r   r!   r(   r   WORDS_TO_PRUNEr%   r   r
   r
   r   r   '  s   
zTree.prune_nonec                 C   s   |   s|  r
dS t| jdkrM| }d}|  s<| s<t|jdkr<|d }|jd }|  s<| s<t|jdks$tdd |jD }t||}|S tdd | jD }|S )Nr   r   c                 s   r9   r   count_unary_depth)r2   tcr
   r
   r   r=   D  r>   z)Tree.count_unary_depth.<locals>.<genexpr>c                 s   r9   r   r   r;   r
   r
   r   r=   G  r>   )r(   r'   r%   r   rz   )r"   r<   scorechild_scorer
   r
   r   r   ;  s   

zTree.count_unary_depthrq   c                 C   sX   t |ddd}| D ]}||| |d q
W d    d S 1 s%w   Y  d S )Nwzutf-8)encodingrG   )openrL   ri   )r   out_filefmtfoutr   r
   r
   r   write_treebankJ  s   "zTree.write_treebank)NNr   )NNN)r   )F)rq   )$r   r   r   r   r#   r'   r(   r0   r7   r8   r:   rV   rp   rr   rx   ry   r{   staticmethodr   r   r   r   r   r   r   r   CONSTITUENT_SPLITr   r   r   r   r   r   r   r   r   r
   r
   r
   r   r   .   sR    



:x

	




r   )r   collectionsr   r   r   enumr   ior   r,   rerg   "stanza.models.common.stanza_objectr   rK   rl   rM   r   compiler   r   r   r   r
   r
   r
   r   <module>   s"    
