o
    hS9                     @   sN  d Z ddlZddlmZ ddlZddlZddlZddlZddlm	Z	m
Z
 ddlmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z" d	Z#ed
dg ddZ$dd Z%dd Z&dd Z'dd Z(d&ddZ)dd Z*d&ddZ+dd Z,dd Z-G d d! d!ej.Z/d"Z0d#d$ Z1e2d%kre1  dS dS )'zInvokes the Java ssurgeon on a document

"ssurgeon" sends text to Java CoreNLP for processing with a ssurgeon
(Semantic graph SURGEON) query

The main program in this file gives a very short intro to how to use it.
    N)
namedtuple)misc_to_space_afterspace_after_to_misc)SsurgeonRequestSsurgeonResponse)java_protobuf_requests)CoNLL)IDTEXTLEMMAUPOSXPOSFEATSHEADDEPRELDEPSMISC
START_CHAREND_CHARNERWordTokenSentencezAedu.stanford.nlp.semgraph.semgrex.ssurgeon.ProcessSsurgeonRequestSsurgeonEditz9semgrex_pattern ssurgeon_edits ssurgeon_id notes language)NNUniversalEnglish)defaultsc           
   	   C   s   |   } td| }g }t|D ];\}}|d}dd |D }d|}dd |D }t|dkr3q|d }|dd  }	|t||	d	|d  | q|S )
Nz

+
c                 S   s&   g | ]}| d r|dd  qS )#   N)
startswithstrip).0line r#   Q/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/server/ssurgeon.py
<listcomp>$      & z(parse_ssurgeon_edits.<locals>.<listcomp> c                 S   s&   g | ]}|  r|d s|  qS )r   )r    r   )r!   xr#   r#   r$   r%   &   r&   r   r   z%d)r    resplit	enumeratejoinlenappendr   )
ssurgeon_textssurgeon_blocksssurgeon_editsidxblocklinescommentsnotessemgrexssurgeonr#   r#   r$   parse_ssurgeon_edits   s   

r9   c                 C   s<   t | dd}t| W  d    S 1 sw   Y  d S )Nutf-8encoding)openr9   read)	edit_filefinr#   r#   r$   read_ssurgeon_edits/   s   
$rA   c                 C   s   t | ttS N)r   send_requestr   SSURGEON_JAVA)requestr#   r#   r$   send_ssurgeon_request3   s   rF   c              
   C   s  t  }|D ]2}|j }|j|_|jD ]}|j| q|jd ur%|j|_	|j
d ur.|j
|_
|jd ur7|j|_qz4t| jD ]+\}}|j }d}	|jD ]}
|
jD ]}t|j||
 t||||	 |	d }	qQqLq>W |S  ty } z	td|||d }~ww )Nr   r   z#Failed to process sentence {}:
{:C})r   r8   addsemgrex_patternr7   r1   	operationr.   ssurgeon_ididr6   languager+   	sentencesgraphtokenswordsr   	add_tokentokenadd_word_to_graph	ExceptionRuntimeErrorformat)docr1   rE   r8   ssurgeon_protorI   sent_idxsentencerN   word_idxrR   worder#   r#   r$   build_request6   s<   








r^   c                 C   s   t ||||}t| |gS rB   )r   r^   )rW   rH   r1   rJ   r6   ssurgeon_editr#   r#   r$   build_request_one_operationT   s   r`   c                 C   s   t | |}t|S )z
    Returns the result of processing the given semgrex expression and ssurgeon edits on the stanza doc.

    Currently the return is a SsurgeonResponse from CoreNLP.proto
    )r^   rF   )rW   r1   rE   r#   r#   r$   process_docX   s   
ra   c                 C   s   t | ||||}t|S rB   )r`   rF   )rW   rH   r1   rJ   r6   rE   r#   r#   r$   process_doc_one_operationb   s   rb   c                 C   s   t | t|jr	|jnd t|jr|jnd t|jr|jnd t|jr!|jnd t	t
|jtd t|jr1|jnd td td td d|jd|jd|jd|ji}t|j|t< |jr[t
|j|t |t< |S )Nis_mwtis_first_mwtmwt_textmwt_misc)r	   r
   r\   r   lemmar   	coarseTagr   posr   r   features_to_stringconllUFeaturesr   r   nerr   r   r   isMWT
isFirstMWTmwtTextmwtMiscr   after
conllUMiscsubstitute_space_misc)
word_index
graph_word
word_entryr#   r#   r$   build_word_entryg   s(   rw   c              
   C   s&  t | } zutt| j|jD ]g\}\}}|j}g }t|j|jD ]\}}t	|j
|}	||	 q#|jdd d |jD ]}
d||
d  t< d||
d  t< q>|jD ]}|jrZqT|j||jd  t< |j||jd  t< qTg }t|D ]\}}|d s|d rt|t |t< || qu|d }|t|k r|| d r|| d s|d7 }|t|k r|| d r|| d rt|| t ||d  t ft|d	 t|t td i}t||d  t |t< ||d  d
 rt||d  d
 |t |t< t|t |t< || || qut|j}t || }g }t|j!D ]\}}||j" |t|j!d kr6 n||j# q d$|}|D ])}|%ds_|%ds_|%ds_|%drh|&d|  qE|&| qE|| j|< |'  qW | S  t(y } z
t)d*||||d }~ww )Nc                 S   s   | t  S rB   )r	   )r(   r#   r#   r$   <lambda>   s    z)convert_response_to_doc.<locals>.<lambda>)keyr   r   rootrd   rc   re   rf    z# text z#text z# text=z#text=z	# text = zRSsurgeon could not process sentence {}
Ssurgeon result:
{}
Original sentence:
{:C})+copydeepcopyr+   ziprM   resultrN   noderR   rw   indexr.   sortrz   r   r   edgeisExtrasourcetargetdepr   remove_space_miscr   r-   r	   r
   r   misc_space_piecesrs   listr5   r   rO   textspaces_afterr,   r   add_commentrebuild_dependenciesrT   rU   rV   )rW   semgrex_responserY   rZ   ssurgeon_resultssurgeon_graphrO   
graph_noderu   rv   rz   r   
mwt_tokensword_start_idxr\   word_end_idxmwt_token_entryold_comments
token_text	token_idxrR   sentence_textcommentr]   r#   r#   r$   convert_response_to_doc   sx   
 


$$ 



0

Hr   c                       s4   e Zd ZdZd	 fdd	Zdd Zd
ddZ  ZS )Ssurgeonz
    Ssurgeon context window

    This is a context window which keeps a process open.  Should allow
    for multiple requests without launching new java processes each time.
    Nc                    s   t t| |tt d S rB   )superr   __init__r   rD   )self	classpath	__class__r#   r$   r      s   zSsurgeon.__init__c                 C   s   t ||}| |S )z\
        Apply each of the ssurgeon patterns to each of the dependency trees in doc
        )r^   process_request)r   rW   r1   rE   r#   r#   r$   process   s   

zSsurgeon.processc                 C   s   t |||||}| |S )zI
        Convenience method - build one operation, then apply it
        )r`   r   )r   rW   rH   r1   rJ   r6   rE   r#   r#   r$   process_one_operation   s   
zSsurgeon.process_one_operationrB   NN)__name__
__module____qualname____doc__r   r   r   __classcell__r#   r#   r   r$   r      s
    r   a  
# sent_id = 271
# text = Hers is easy to clean.
# previous = What did the dealer like about Alex's car?
# comment = extraction/raising via "tough extraction" and clausal subject
1	Hers	hers	PRON	PRP	Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs	3	nsubj	_	_
2	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	3	cop	_	_
3	easy	easy	ADJ	JJ	Degree=Pos	0	root	_	_
4	to	to	PART	TO	_	5	mark	_	_
5	clean	clean	VERB	VB	VerbForm=Inf	3	csubj	_	SpaceAfter=No
6	.	.	PUNCT	.	_	5	punct	_	_
c               	      sJ  z	t jjdd W n	 ty   Y nw t } | jdtd dd | jdtd dd | jdtd d	d | jd
tddd | jddd dd | jdtd dd | jdtd dd | jdtddd | jdtdgddd | jdddd d!d" | jd#dd$d%d& |    j	rt
 j	}nt j jg} jrtj jd'g} jg}t||}n0 jrȈ jstd(tj jst j  fd)d*}| }ntjtd+g}d g}t||}|D ]I\}} jrtd,| t||}t|}	t ||	}
|d urt!|d-dd}|"d.|
 W d    n	1 sw   Y  qtd/|
 qd S )0Nr:   r;   z--input_filez<Input file to process (otherwise will process a sample text))typedefaulthelpz--output_filez,Output file (otherwise will write to stdout)z--input_dirzSInput dir to process instead of a single file.  Allows for reusing the Java programz--input_filterz.*[.]conlluzmOnly process files from the input_dir that match this filter - regex, not shell filter.  Default: %(default)sz--no_input_filterstore_constz(Remove the default input filename filter)actionconstr   z--output_dirz<Output dir for writing files, necessary if using --input_dirz--edit_filez+File to get semgrex and ssurgeon rules fromz	--semgrexz!{}=source >nsubj {} >csubj=bad {}zrSemgrex to apply to the text.  A default detects words which have both an nsubj and a csubj.  Default: %(default)sr8   z&relabelNamedEdge -edge bad -reln advcl*zSsurgeon edits to apply based on the Semgrex.  Can have multiple edits in a row.  A default exists to transform csubj into advcl.  Default: %(default)s)r   r   nargsr   z--print_inputprint_input
store_trueFzPPrint the input alongside the output - gets kind of noisy.  Default: %(default)s)destr   r   r   z--no_print_inputstore_falsez?Don't print the input alongside the output - gets kind of noisy)r   r   r   
input_filezvCannot process multiple files without knowing where to send them - please set --output_dir in order to use --input_dirc                  3   sp    t  jD ].}  jrt j| sqt j j| }t j j| }t	d||f  t
j|d|fV  qd S )NzProcessing %s to %sr   )oslistdir	input_dirinput_filterr)   matchpathr,   
output_dirprintr   	conll2doc)doc_filenamedoc_pathoutput_pathargsr#   r$   	read_docs  s   zmain.<locals>.read_docs)	input_strz{:C}wz{:C}

z{:C}
)#sysstdoutreconfigureAttributeErrorargparseArgumentParseradd_argumentstr
parse_argsr?   rA   r   r7   r8   r   r   r   output_filer~   r   r   
ValueErrorr   r   existsmakedirs
SAMPLE_DOCr   r   rV   r^   rF   r   r=   write)parserr1   docsoutputsinput_outputr   rW   outputssurgeon_requestssurgeon_responseupdated_docfoutr#   r   r$   main   s`   	



r   __main__r   )3r   r   collectionsr   r|   r   r)   r   stanza.models.common.utilsr   r   stanza.protobufr   r   stanza.serverr   stanza.utils.conllr   stanza.models.common.docr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rD   r   r9   rA   rF   r^   r`   ra   rb   rw   r   JavaProtobufContextr   r   r   r   r#   r#   r#   r$   <module>   s@    	H


MB
