o
    hb                     @   s   d Z ddlmZmZ ddlZddlmZ ddlmZ ddl	Z	ddl
Z
ddlmZ e
dZG dd	 d	eZe	jG d
d deZG dd deZG dd deZG dd dZdd ZG dd deZG dd deZG dd deZdd ZdS )z
Defines a series of transitions (open a constituent, close a constituent, etc

Also defines a State which holds the various data needed to build
a parse tree out of tagged words.
    )ABCabstractmethodN)defaultdict)Enum)Treestanzac                   @   s,   e Zd Zdd ZdZdZdZdZdZdZ	d	S )
TransitionSchemec                 C   s   t | }||_||_|S N)object__new___value_
short_name)clsvaluer   obj r   g/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/models/constituency/parse_transitions.pyr      s   
zTransitionScheme.__new__)   top)   topc)   topu)   in)   inc)   inuN)
__name__
__module____qualname__r   TOP_DOWNTOP_DOWN_COMPOUNDTOP_DOWN_UNARYIN_ORDERIN_ORDER_COMPOUNDIN_ORDER_UNARYr   r   r   r   r      s    r   c                   @   sh   e Zd ZdZedd Zdd Zdd Zedd	 Zd
d Z	edd Z
dd Zdd Zedd ZdS )
Transitionz
    model is passed in as a dependency injection
    for example, an LSTM model can update hidden & output vectors when transitioning
    c                 C      dS )a  
        update the word queue position, possibly remove old pieces from the constituents state, and return the new constituent

        the return value should be a tuple:
          updated word_position
          updated constituents
          new constituent to put on the queue and None
            - note that the constituent shouldn't be on the queue yet
              that allows putting it on as a batch operation, which
              saves a significant amount of time in an LSTM, for example
          OR
          data used to make a new constituent and the method used
            - for example, CloseConstituent can return the children needed
              and itself.  this allows a batch operation to build
              the constituent
        Nr   selfstatemodelr   r   r   update_stateC       zTransition.update_statec                 C   r)   Nr   r   r+   r   r   r   delta_opensV      zTransition.delta_opensc                 C   s   | |g| g}|d S )z
        return a new State transformed via this transition

        convenience method to call bulk_apply, which is significantly
        faster than single operations for an NN based model
        r   )
bulk_apply)r+   r,   r-   updater   r   r   applyY   s   zTransition.applyc                 C   r)   )z
        assess whether or not this transition is legal in this state

        at parse time, the parser might choose a transition which cannot be made
        Nr   r*   r   r   r   is_legalc   r/   zTransition.is_legalc                 C   s   | gS )z
        Return a list of transitions which could theoretically make up this transition

        For example, an Open transition with multiple labels would
        return a list of Opens with those labels
        r   r1   r   r   r   
componentsk   s   zTransition.componentsc                 C   r)   )z:
        A short name to identify this transition
        Nr   r1   r   r   r   r   t   r/   zTransition.short_namec                 C   sV   t | ds	|  S t| jtr| j}nt| jdkr | jd }n| j}d|  |S )Nlabelr   r   z{}({}))hasattrr   
isinstancer9   strlenformatr+   r9   r   r   r   short_labelz   s   
zTransition.short_labelc                 C   s8   | |krdS t | trdS t |trdS t| t|k S )NFT)r;   Shiftr<   r+   otherr   r   r   __lt__   s   

zTransition.__lt__c                 C   s   | dkrt  S | dkrt S | jddd}|d dvr!td|  t|dkr/td	|d  |d d
 dkrCtd|d |d f |d }|d dd
 }t|}|dkr\t| S |dkrdt| S |dkrlt	| S td|  )z
        This method is to avoid using eval() or otherwise trying to
        deserialize strings in a possibly untrusted manner when
        loading from a checkpoint
        rA   CloseConstituent(r   )maxsplitr   )CompoundUnaryOpenConstituentFinalizezUnknown Transition %sz+Unexpected Transition repr, %s needs labels)z+Expected Transition repr for %s: %s(labels)NrH   rI   rJ   zUnexpected Transition %s)
rA   rE   split
ValueErrorr=   astliteral_evalrH   rI   rJ   )desclabels
trans_typer   r   r   	from_repr   s*   
zTransition.from_reprN)r   r    r!   __doc__r   r.   r2   r6   r7   r8   r   r@   rD   staticmethodrT   r   r   r   r   r(   =   s    


	
r(   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )rA   c                 C   s   | |}|jd |j|dfS )z
        This will handle all aspects of a shift transition

        - push the top element of the word queue onto constituents
        - pop the top element of the word queue
        r   N)transform_word_to_constituentword_positionconstituents)r+   r,   r-   new_constituentr   r   r   r.      s   
zShift.update_statec                 C   s   |  rdS |jr;|jdkrdS |jdkr9|jjdusJ |jjjdu r9||j}t|jdkr9|j|j	v r9dS dS |jdkrE|j
sEdS dS )zr
        Disallow shifting when the word queue is empty or there are no opens to eventually eat this word
        Fr   r   NT)empty_word_queueis_top_down	num_openstransitionsparentget_top_transitionr=   r9   	top_labelroot_labelsempty_constituents)r+   r,   r-   transr   r   r   r7      s    



zShift.is_legalc                 C   r)   NrA   r   r1   r   r   r   r      r3   zShift.short_namec                 C   r)   re   r   r1   r   r   r   __repr__   r3   zShift.__repr__c                 C      | |u rdS t |trdS dS NTF)r;   rA   rB   r   r   r   __eq__   
   
zShift.__eq__c                 C      t dS )N%   hashr1   r   r   r   __hash__      zShift.__hash__N)	r   r    r!   r.   r7   r   rf   ri   ro   r   r   r   r   rA      s    
'rA   c                   @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )rH   c                 G      t || _d S r	   tupler9   r?   r   r   r   __init__   s   zCompoundUnary.__init__c                 C   s*   |j }|jg}| }|j|| j|ftfS )z
        Apply potentially multiple unary transitions to the same preterminal

        It reuses the CloseConstituent machinery
        )rY   r   poprX   r9   rE   )r+   r,   r-   rY   childrenr   r   r   r.      s   zCompoundUnary.update_statec                 C   s   | |j}|du rdS t||jttfrdS | tj	u r$|
 S | tjur-dS | jd |jv }| r=| s@| S |S )zf
        Disallow consecutive CompoundUnary transitions, force final transition to go to ROOT
        NFTr   )get_top_constituentrY   r;   r`   r^   rH   rI   transition_schemer   r&   is_preterminalr$   r9   rb   r[   has_one_constituent)r+   r,   r-   treeis_rootr   r   r   r7     s   zCompoundUnary.is_legalc                 C      dd | j D S )Nc                 S      g | ]}t |qS r   )rH   .0r9   r   r   r   
<listcomp>       z,CompoundUnary.components.<locals>.<listcomp>r9   r1   r   r   r   r8        zCompoundUnary.componentsc                 C   r)   )NUnaryr   r1   r   r   r   r   "  r3   zCompoundUnary.short_namec                 C      dd | j S )NzCompoundUnary(%s),joinr9   r1   r   r   r   rf   %  r   zCompoundUnary.__repr__c                 C   .   | |u rdS t |tsdS | j|jkrdS dS rh   )r;   rH   r9   rB   r   r   r   ri   (     
zCompoundUnary.__eq__c                 C   
   t | jS r	   rn   r9   r1   r   r   r   ro   1     
zCompoundUnary.__hash__N)r   r    r!   rt   r.   r7   r8   r   rf   ri   ro   r   r   r   r   rH      s    	rH   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )Dummyz_
    Takes a space on the constituent stack to represent where an Open transition occurred
    c                 C   s
   || _ d S r	   r   r?   r   r   r   rt   8  r   zDummy.__init__c                 C   r)   )NFr   r1   r   r   r   ry   ;  r3   zDummy.is_preterminalc                 C   s@   |d u s|dks|dkrd| j  S |dkrd| j  S td| )N Oz(%s ...)Tz\Tree [.%s ? ]zUnhandled spec: %s)r9   rN   )r+   specr   r   r   
__format__>  s
   

zDummy.__format__c                 C      d | jS )Nz	Dummy({})r>   r9   r1   r   r   r   __str__E     zDummy.__str__c                 C   r   rh   )r;   r   r9   rB   r   r   r   ri   H  r   zDummy.__eq__c                 C   r   r	   r   r1   r   r   r   ro   Q  r   zDummy.__hash__N)
r   r    r!   rU   rt   ry   r   r   ri   ro   r   r   r   r   r   4  s    	r   c                 C   s@   | du rdS t |d D ]}t| jdkr dS | jd } qdS )z
    Return True iff there are UNARY_LIMIT unary nodes in a tree in a row

    helps prevent infinite open/close patterns
    otherwise, the model can get stuck in essentially an infinite loop
    NFr   r   T)ranger=   rv   )r{   unary_limit_r   r   r   too_many_unary_nodesT  s   r   c                   @   sT   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd ZdS )rI   c                 G   s   t || _| jd | _d S r0   )rs   r9   ra   r?   r   r   r   rt   d  s   
zOpenConstituent.__init__c                 C   r)   )Nr   r   r1   r   r   r   r2   h  r3   zOpenConstituent.delta_opensc                 C   s   |j |j|t| jd fS r	   )rX   rY   dummy_constituentr   r9   r*   r   r   r   r.   k  s   zOpenConstituent.update_statec                 C   s   |j |jd kr
dS |jr*| rdS | s(| j|jv }|r#| S |  S dS |jr/dS t	|
|jtr:dS | tjurF|  S | j|jv }|rW|j dkoV| S |j dks`| rmt||j| rmdS dS )z>
        disallow based on the length of the sentence
        
   Fr   T)r]   sentence_lengthr\   r[   has_unary_transitionsra   rb   empty_transitionsrc   r;   r`   r^   rI   rx   r   r%   r   rw   rY   r   )r+   r,   r-   r|   r   r   r   r7   p  s.   
,
(	zOpenConstituent.is_legalc                 C   r}   )Nc                 S   r~   r   )rI   r   r   r   r   r     r   z.OpenConstituent.components.<locals>.<listcomp>r   r1   r   r   r   r8     r   zOpenConstituent.componentsc                 C   r)   )NOpenr   r1   r   r   r   r     r3   zOpenConstituent.short_namec                 C   r   )NzOpenConstituent({})r   r1   r   r   r   rf     r   zOpenConstituent.__repr__c                 C   r   rh   )r;   rI   r9   rB   r   r   r   ri     r   zOpenConstituent.__eq__c                 C   r   r	   r   r1   r   r   r   ro     r   zOpenConstituent.__hash__N)r   r    r!   rt   r2   r.   r7   r8   r   rf   ri   ro   r   r   r   r   rI   c  s    D	rI   c                   @   sH   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )rJ   z
    Specifically applies at the end of a parse sequence to add a ROOT

    Seemed like the simplest way to remove ROOT from the
    in_order_compound transitions while still using the mechanism of
    the transitions to build the parse tree
    c                 G   rq   r	   rr   r?   r   r   r   rt        zFinalize.__init__c                 C   s.   |j }|jg}| }| j}|j|||ftfS )z
        Apply potentially multiple unary transitions to the same preterminal

        Only applies to preterminals
        It reuses the CloseConstituent machinery
        )rY   r   ru   r9   rX   rE   )r+   r,   r-   rY   rv   r9   r   r   r   r.     s
   zFinalize.update_statec                 C   s   |  o| o|| S )zV
        Legal if & only if there is one tree, no more words, and no ROOT yet
        )r[   rz   finishedr*   r   r   r   r7     s   zFinalize.is_legalc                 C   r)   )NrJ   r   r1   r   r   r   r     r3   zFinalize.short_namec                 C   r   )NzFinalize(%s)r   r   r1   r   r   r   rf     r   zFinalize.__repr__c                 C   s&   | |u rdS t |tsdS |j| jkS rh   )r;   rJ   r9   rB   r   r   r   ri     s
   
zFinalize.__eq__c                 C   s   t d| jfS )N5   r   r1   r   r   r   ro     r   zFinalize.__hash__N)r   r    r!   rU   rt   r.   r7   r   rf   ri   ro   r   r   r   r   rJ     s    rJ   c                   @   sP   e Zd Zdd Zdd Zedd Zdd Zd	d
 Zdd Z	dd Z
dd ZdS )rE   c                 C   r)   )NrK   r   r1   r   r   r   r2     r3   zCloseConstituent.delta_opensc                 C   s   g }|j }t||ts||j | }t||tr||j}| }|js6||j | }|	  |j
|||ftfS r	   )rY   r;   rw   r   appendr   ru   r9   r\   reverserX   rE   )r+   r,   r-   rv   rY   r9   r   r   r   r.      s   zCloseConstituent.update_statec                 C   s"   t tt| \}}| ||}|S )a  
        builds new constituents out of the incoming data

        data is a list of tuples: (label, children)
        the model will batch the build operation
        again, the purpose of this batching is to do multiple deep learning operations at once
        )maplistzipbuild_constituents)r-   datarR   children_listsnew_constituentsr   r   r   r     s   	z#CloseConstituent.build_constituentsc                 C   s  |j dkrdS |jrEt||jtrdS |j dkr | s dS | tj	kr4|j dkr2| s2dS dS |
 sC|j dkrC| sCdS dS | tju rut||jtsWdS |j dks`| rbdS ||j }t|| rsdS dS t||jtrdS dS )z
        Disallow if there is no Open on the stack yet

        in TOP_DOWN, if the previous transition was the Open (nothing built yet)
        in IN_ORDER, previous transition does not matter, except for one small corner case
        r   Fr   r   T)r]   r\   r;   r`   r^   rI   r[   rx   r   r#   r   r%   rw   rY   ru   r   r   )r+   r,   r-   noder   r   r   r7   %  s6   
#zCloseConstituent.is_legalc                 C   r)   )NCloser   r1   r   r   r   r   ^  r3   zCloseConstituent.short_namec                 C   r)   )NrE   r   r1   r   r   r   rf   a  r3   zCloseConstituent.__repr__c                 C   rg   rh   )r;   rE   rB   r   r   r   ri   d  rj   zCloseConstituent.__eq__c                 C   rk   )N]   rm   r1   r   r   r   ro   k  rp   zCloseConstituent.__hash__N)r   r    r!   r2   r.   rV   r   r7   r   rf   ri   ro   r   r   r   r   rE     s    
9rE   c                 C   sl   t  }|D ]}|| vr#| D ]}|| vrtd||q|| qt|dkr4tdt| dS dS )a  
    Check that all the transitions in the other dataset are known in the train set

    Weird nested unaries are warned rather than failed as long as the
    components are all known

    There is a tree in VLSP, for example, with three (!) nested NP nodes
    If this is an unknown compound transition, we won't possibly get it
    right when parsing, but at least we don't need to fail
    zDFound transition {} in the {} set which don't exist in the train setr   zlFound transitions where the components are all valid transitions, but the complete transition is unknown: %sN)	setr8   RuntimeErrorr>   addr=   loggerwarningsorted)train_transitionsother_transitionstreebank_nameunknown_transitionsrd   	componentr   r   r   check_transitionsn  s   
r   )rU   abcr   r   rO   collectionsr   enumr   	functoolslogging%stanza.models.constituency.parse_treer   	getLoggerr   r   total_orderingr(   rA   rH   r   r   rI   rJ   rE   r   r   r   r   r   <module>   s(    
*pBD f3r