o
    h=                     @   s  d dl Z d dlZd dlZd dlm  mZ d dlm  m	  m
Z d dlZd dlmZ d dlmZ d dlT d dlmZ d dlT d dlmZ d dlmZ d dl Z d dlmZmZmZ d dlZd	eeef fd
dZd	ee fddZd	ee e!e!f fddZ"d	e fddZ#d	eeeee ee e!e!e f f fddZ$	d(dededee de de dede!de!de de d	dfdd Z%d!d" Z&d#d$ Z'd%d& Z(e)d'kre(  dS dS ))    N)visualize_search_str)edit_html_overflow)*)CoNLL)Pipeline)StringIO)ListTupleAnyreturnc                  C   s(   t jdttd} t jdttd}| |fS )z
    Gets user input for the Semgrex text and queries to process.

    @return: A tuple containing the user's input text and their input queries
    Text to analyzeplaceholder9Semgrex search queries (separate each query with a comma))st	text_areaDEFAULT_SAMPLE_TEXTDEFAULT_SEMGREX_QUERY)	input_txtinput_queries r   a/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/visualization/semgrex_app.pyget_semgrex_text_and_query   s   r   c                  C   sP   t d t jdddd} g }| D ]}t| d}| }|| q|S )z
    Allows user to submit files for analysis.

    @return: List of strings containing the file contents of each submitted file. The i-th element of res is the
    string representing the i-th file uploaded.
    z-**Alternatively, upload file(s) to analyze.**button_labelT	collapsedaccept_multiple_fileslabel_visibilityutf-8)r   markdownfile_uploaderr   getvaluedecodereadappend)uploaded_filesresfilestringiostring_datar   r   r   get_file_input(   s   
r*   c                  C   sJ   t jddd} d\}}| r t jdddd}t jd	d
d|d d}| ||fS )a9  
    Allows user to specify a specific window of Semgrex hits to visualize. Works similar to Python splicing.

    @return: A tuple containing a bool representing whether or not the user wants to visualize a splice of
    the visualizations, and two ints representing the start and end indices of the splice.
    z3Visualize a specific window of Semgrex search hits?zAIf you want to visualize all search results, leave this unmarked.help)NNz2Which search hit should visualizations start from?zDIf you want to visualize the first 10 search results, set this to 0.r   )r,   	min_valuez/Which search hit should visualizations stop on?zIf you want to visualize the first 10 search results, set this to 11.
                                     The 11th result will NOT be displayed.      )r,   valuer-   )r   checkboxnumber_input)show_windowstart_window
end_windowr   r   r   get_semgrex_window_input;   s$   
r6   c                  C   s   t jddd} | S )zP
    Prompts client for whether they want to see xpos tags instead of upos.
    z&Would you like to visualize xpos tags?zThe default visualization options use upos tags for part-of-speech labeling. If xpos tags aren't available for the sentence, displays upos.r+   )r   r1   )use_xposr   r   r   get_pos_inputW   s   r8   c                  C   s*   t  \} }t }t }t }| ||||fS )zH
    Tie together all inputs to query user for all possible inputs.
    )r   r*   r6   r8   )r   r   client_fileswindow_inputvisualize_xposr   r   r   	get_input`   s
   
r<   Tr   r   r9   r3   clickedpiper4   r5   r;   show_successc
                 C   s  |r| s|st d dS | r|rt d dS |s!t d dS zt d dd |dD }
|r^g dtd	}}}|rI|d
 |d
 }}|D ]}t||
d||||d}||7 }qKn|rpt| |
d|d
 |d
 ||d}nt| |
dtd	||d}t|dkrt d |D ]}t|}t	j
|dddd q|	rt|d
krt dt| d nt dt| d W d   W dS W d   W dS W d   W dS 1 sw   Y  W dS  ty   t d Y dS w dS )aL  
    Run Semgrex search on the input text/files with input query and serve the HTML on the app.

    @param input_txt: Text to analyze and draw sentences from.
    @param input_queries: Semgrex queries to parse the input with.
    @param client_files: Alternative to input text, we can parse the content of files for scaled analysis.
    @param show_window: Whether or not the user wants a splice of the visualizations
    @param clicked: Whether or not the button has been clicked to run Semgrex search
    @param pipe: NLP pipeline to process input with
    @param start_window: If displaying a splice of visualizations, this is the start idx
    @param end_window: If displaying a splice of visualizations, this is the end idx
    @param visualize_xpos: Set to true if using xpos tags for part of speech labels, otherwise use upos tags

    z9Please provide a text input or upload files for analysis.zQPlease only choose to visualize your input text or your uploaded files, not both.z(Please provide a set of Semgrex queries.Processing...c                 S   s   g | ]}|  qS r   )strip).0queryr   r   r   
<listcomp>   s    z'run_semgrex_process.<locals>.<listcomp>,r   infr/   en)start_match	end_matchr>   r;   )rI   r>   r;   No Semgrex match hits!     Theightwidth	scrollingzCompleted! Visualized z Semgrex search hit.z Semgrex search hits.NzQYour text input or your provided Semgrex queries are incorrect. Please try again.)r   errorspinnersplitfloatr   lenwriter   
componentshtmlsuccessOSError)r   r   r9   r3   r=   r>   r4   r5   r;   r?   querieshtml_stringsbegin_viz_idxend_viz_idxclient_fileclient_file_html_stringsss_no_overflowr   r   r   run_semgrex_processk   s   
	


57&>
rc   c            
      C   sh   t d d} t j| dd t \}}}}}|\}}}t jddd}	t|||||	t jd |||d		 d
S )aP  
    Contains the Semgrex portion of the webpage.

    This contains the markdown and calls to the processes which run when a query is made.

    When the `Load Semgrex search visualization` button is pressed, the function `run_semgrex_process`
    is called inside this function and the rendered visual is placed onto the webpage.
    zDisplaying Semgrex QuerieszE<h3>Enter a text below, along with your Semgrex query of choice.</h3>Tunsafe_allow_htmlz!Load Semgrex search visualizationzsSemgrex search visualizations only display 
    sentences with a query match. Non-matching sentences are not shown.r+   pipeline)	r   r   r9   r3   r=   r>   r4   r5   r;   N)r   titler   r<   buttonrc   session_state)
html_stringr   r   r9   r:   r;   r3   r4   r5   r=   r   r   r   semgrex_state   s*   


rk   c                  C   sP  t d t jdttd} t jdddd}t jdddd}t d t jd	d
dd}g }|D ]}t| d}|	 }|
| q.t d}t d}	|rzwt dh |}
|g}t| |
|}tj| d}ddd |jD }d}t j|d
d tt||
|dg t jd ddddd
 t|dkrt d |D ]}d}t j|d
d t|}tj|dd d
d! qW d"   n1 sw   Y  W n ty   t d# Y nw |	r&t|dkrt d$ t d%3 |d }tj|d}t|||g}t||}t|d }d&|}t jd'|d(d) W d"   d"S 1 sw   Y  d"S d"S )*af  
    Contains the ssurgeon state for the webpage.

    This contains the markdown and calls the processes that run Ssurgeon operations.

    When the text boxes, buttons, or other interactable features are edited by the user, this function
    runs with the updated page state and conducts operations (e.g. runs a Ssurgeon operation on a submitted file)
    zDisplaying Ssurgeon Resultsr   r   r   z!{}=source >nsubj {} >csubj=bad {}zSsurgeon commandsz&relabelNamedEdge -edge bad -reln advclz***Alternatively, upload file(s) to edit.** Tr   r   r   zLoad Ssurgeon visualizationz	Edit Filer@   )	input_str c                 S   s   g | ]}|j D ]}|jqqS r   )wordstext)rB   sentencewordr   r   r   rD   ;  s    z"ssurgeon_state.<locals>.<listcomp>z'<h3>Previous deprel visualization:</h3>rd   Frf   r/   r.   )
r   r   r=   r3   r9   r>   r4   r5   r;   r?   r   rJ   z%<h3>Edited deprel visualization:</h3>rK   rL   rM   NzZYour text input or your provided Semgrex/Ssurgeon queries are incorrect. Please try again.z$You must provide files for analysis.z
Editing...
zDownload your edited filezSSurgeon.conll)data	file_name) r   rg   r   SAMPLE_SSURGEON_DOCr   r    r   r!   r"   r#   r$   rh   rR   ssv,visualize_ssurgeon_deprel_adjusted_str_inputr   	conll2docjoin	sentencesrW   rX   rc   ri   rU   rV   r   rZ   rQ   process_doc_one_operationconvert_response_to_doc	doc2conlldownload_button)r   semgrex_input_queriesssurgeon_input_queriesr%   r&   r'   r(   r)   r=   clicked_for_file_editsemgrex_queriesssurgeon_queriesr\   doc
string_txtrj   ra   rb   single_filessurgeon_responseupdated_docoutput
output_strr   r   r   ssurgeon_state   s   



!


$r   c                  C   s   t  } | jdttjdd dd |  }|j}|tjd< tjdd u r/t	
d tddtjvr?tddd	}|tjd< t  t  d S )
Nz--CLASSPATH	CLASSPATHzPath to your CoreNLP directory.)typedefaultr,   zEProvide a valid $CLASSPATH value (path to your CoreNLP installation).rf   rG   ztokenize, pos, lemma, depparse)
processors)argparseArgumentParseradd_argumentstrosenvironget
parse_argsr   loggingrQ   
ValueErrorr   ri   r   rk   r   )parserargsr   en_nlp_stanzar   r   r   maini  s(   




r   __main__)T)*r   sys	streamlitr   streamlit.components.v1rW   v1.stanza.utils.visualization.ssurgeon_visualizerutilsvisualizationssurgeon_visualizerrw   r   -stanza.utils.visualization.semgrex_visualizerr   r   $stanza.utils.visualization.constantsstanza.utils.conllr   stanza.server.ssurgeonstanza.pipeline.corer   ior   typingr   r	   r
   r   r   r   r*   boolintr6   r8   r<   rc   rk   r   r   __name__r   r   r   r   <module>   sf    *		

l(j
