o
    h                  	   @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlZddlZddlmZ ddlmZ ddlmZ ddlmZmZmZmZ dZedZed	Zg d
ZdddddddddZ dd Z!g dZ"d2ddZ#G dd de$Z%G dd de%Z&G dd de$Z'G dd  d e$Z(G d!d" d"ej)Z*d#d$ Z+G d%d& d&e,Z-d3d'd(Z.G d)d* d*e-Z/d+d, Z0d3d-d.Z1d/d0 Z2g d1Z3dS )4z1
Client for accessing Stanford CoreNLP in Python
    N)datetime)Path)urlparse)DocumentparseFromDelimitedStringwriteToDelimitedStringto_textz)arunchaganty, kelvinguu, vzhong, wmonroe4stanzazcorenlp_server-(.*).props)ararabicchinesezhenglishenfrenchfrdegermanhu	hungarianititalianesspanishr   r   r   r   r   r   r   r   )r
   r   r   r   r   r   r   r   c                 C   s   |   tv S )z1 Check if a string references a CoreNLP language )lowerCORENLP_LANGS)	props_str r   O/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/server/client.pyis_corenlp_lang2   s   r   )conllconllujson
serializedtextxml	inlinexmlc                 C   sh   |r|  tvrt| dt t| tkr.d| v r0| d   tvr2t| d  dt dS dS dS )z0 Do basic checks to validate CoreNLP properties z6 not a valid CoreNLP outputFormat value! Choose from: outputFormatN)r   CORENLP_OUTPUT_VALS
ValueErrortypedict
properties
annotatorsoutput_formatr   r   r   validate_corenlp_props;   s   r0   c                   @      e Zd ZdZdS )AnnotationExceptionzQ Exception raised when there was an error communicating with the CoreNLP server. N__name__
__module____qualname____doc__r   r   r   r   r2   E       r2   c                   @   r1   )TimeoutExceptionz5 Exception raised when the CoreNLP server timed out. Nr3   r   r   r   r   r9   J   r8   r9   c                   @   r1   )ShouldRetryExceptionz; Exception raised if the service should retry the request. Nr3   r   r   r   r   r:   O   r8   r:   c                   @   r1   )PermanentlyFailedExceptionz? Exception raised if the service should NOT retry the request. Nr3   r   r   r   r   r;   T   r8   r;   c                   @   s   e Zd ZdZdZdZdS )StartServerr         N)r4   r5   r6   
DONT_STARTFORCE_START	TRY_STARTr   r   r   r   r<   X   s    r<   c                 C   s<   | rt j| rtt j| rt |  d S d S d S d S N)ospathisfileSERVER_PROPS_TMP_FILE_PATTERNmatchbasenameremove)
props_filer   r   r   clean_props_file^   s
   rK   c                   @   sZ   e Zd ZdZdZ		dddZdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd ZdS )RobustServicez: Service that resuscitates itself if it is not available. x   NFc
           
      C   sf   |ot || _|ot || _|| _|| _|| _d | _d| _|| _	|| _
|| _|	| _t| j d S )NF)shlexsplit	start_cmdstop_cmdendpointstdoutstderrserver	is_activebe_quiethostportignore_binding_erroratexitregisteratexit_kill)
selfrP   rQ   rR   rS   rT   rW   rX   rY   rZ   r   r   r   __init__i   s   zRobustService.__init__c              
   C   s^   z| j s| jd ur| j d urW dS t| jd jW S  tjjy. } zt	|d }~ww )NFz/ping)
rZ   rU   pollrequestsgetrR   ok
exceptionsConnectionErrorr:   )r^   er   r   r   is_alivey   s   zRobustService.is_alivec                 C   sZ  | j r| jre| jretttjtjI}z|| j| jf W n5 tj	yU } z(| j
rItd| j d| j  d | _W Y d }~W d    d S td| j |d }~ww W d    n1 s`w   Y  | jrzttdrqtj}nttjd}|}n| j}| j}tdd| j   ztj| j ||d| _W d S  ty } ztd	|d }~ww d S )
Nz)Connecting to existing CoreNLP server at :zbError: unable to start the CoreNLP server on port %d (possibly something is already running there)DEVNULLwzStarting server with command:  )rT   rS   zWhen trying to run CoreNLP, a FileNotFoundError occurred, which frequently means Java was not installed or was not in the classpath.)rP   rX   rY   
contextlibclosingsocketAF_INETSOCK_STREAMbinderrorrZ   loggerinforU   r;   rW   hasattr
subprocessri   openrC   devnullrS   rT   joinPopenFileNotFoundError)r^   sockrf   rT   rS   r   r   r   start   sL   
	

zRobustService.startc                 C   s*   | j r| j  d u r| j   d S d S d S rB   )rU   r`   	terminater^   r   r   r   r]      s   zRobustService.atexit_killc                 C   s   | j r6| j   z| j d W n" tjy2   | j   z| j d W n
 tjy/   Y nw Y nw d | _ | jrAtj| jdd d| _d S )N   T)checkF)	rU   r~   waitrv   TimeoutExpiredkillrQ   runrV   r   r   r   r   stop   s"   


zRobustService.stopc                 C   s   |    | S rB   )r}   r   r   r   r   	__enter__   s   zRobustService.__enter__c                 C   s   |    d S rB   )r   )r^   ______r   r   r   __exit__   s   zRobustService.__exit__c                 C   s   | j rz|  rW d S |   W n	 ty   Y nw | jd u r#|   t }	 z|  r/W nW n	 ty9   Y nw t | | jk rItd nt	dq(d| _ d S )NTr=   z,Timed out waiting for service to come alive.)
rV   rg   r   r:   rU   r}   timeCHECK_ALIVE_TIMEOUTsleepr;   )r^   
start_timer   r   r   ensure_alive   s0   

zRobustService.ensure_alive)NNFNNF)r4   r5   r6   r7   r   r_   rg   r}   r]   r   r   r   r   r   r   r   r   rL   e   s    
 rL   c                 C   s   | dks| du rt dddkrt d} | S | du rAt dt jtt d} t j| s:td	t dt j| d} | S )a  
    Returns the classpath to use for corenlp.

    Prefers to use the given classpath parameter, if available.  If
    not, uses the CORENLP_HOME environment variable.  Resolves $CLASSPATH
    (the exact string) in either the classpath parameter or $CORENLP_HOME.
    z
$CLASSPATHNCORENLP_HOME	CLASSPATHstanza_corenlpzPlease install CoreNLP by running `stanza.install_corenlp()`. If you have installed it, please define $CORENLP_HOME to be location of your CoreNLP distribution or pass in a classpath parameter.  $CORENLP_HOME={}*)
rC   getenvrD   ry   strr   homeexistsr{   format)	classpathr   r   r   resolve_classpath   s    
	r   c                       s   e Zd ZdZdZdZdZdZdZdZ	e
jeeedd	dddded	e	d
df fdd	Zdd Zdd Zd#ddZd$ddZd%ddZd&ddZd&ddZdd Zd'ddZd%dd Zd(d!d"Z  ZS ))CoreNLPClientz* A client to the Stanford CoreNLP server. zhttp://localhost:9000i`  r   r#   5Gi NFTc                    s  || _ d | _d | _d | _d | _d | _t|||d || _|| _|| _	|| _
|   t|trGd| jj dd }t| |du rDtjntj}|tju sR|tju r)t | _|   t|jd\}}t|}|dksqJ dt|}d	| d
| d| d| d| d| d| d}|| _|| _|| _| jd ur|d| j 7 }| j	r|d7 }| jd urt|tkr| jnd |}|d| 7 }|rt|tkr|d7 }nt|t!kr|dd | 7 }nt|tkr|d| 7 }|d| j
 7 }dD ]}|"|d ur
|d| 7 }qdD ]}|"|d ur$|d| d|"| 7 }qd }nd  }}d  }}t#t$| j%||||	|
||||tjkd	 || _&d S )Nr,   z<Setting 'start_server' to a boolean value when constructing z is deprecated and will stopzv to function in a future version of stanza. Please consider switching to using a value from stanza.server.StartServer.Trh   	localhostz0If starting a server, endpoint must be localhostz	java -Xmxz -cp 'z9'  edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port z
 -timeout z
 -threads z -maxCharLength z -quiet rk   z -serverProperties z -preTokenized,z -annotators z	 -preloadz
 -preload z -outputFormat )sslstrictz -)status_port
uriContextkeyusernamepassword	blockList	server_id)rX   rY   rZ   )'start_serverserver_props_pathserver_start_timeserver_hostserver_portserver_classpathr0   r-   r.   pretokenizedr/   _setup_client_defaults
isinstancebool	__class__r4   rs   warningr<   r@   r?   rA   r   now_setup_server_defaultsr   netlocrO   intr   r*   r   ry   listrb   superr   r_   timeout)r^   r   rR   r   threadsr.   r   r/   r-   rS   rT   memoryrW   max_char_lengthpreloadr   kwargswarning_msgrX   rY   rP   annotators_strkwrQ   r   r   r   r_     s   






zCoreNLPClient.__init__c                 C   sn   | j durt| j tkr| j nd| j | _ | jdu r5t| jtkr/d| jv r/| jd | _dS tj| _dS dS )z
        Do some processing of annotators and output_format specified for the client.
        If interacting with an externally started server, these will be defaults for annotate() calls.
        :return: None
        Nr   r'   )	r.   r*   r   ry   r/   r-   r+   r   DEFAULT_OUTPUT_FORMATr   r   r   r   r   q  s   
"
z$CoreNLPClient._setup_client_defaultsc                 C   s2  | j du st| j tst| j ts| j durtd i | _ t| j tr[t| j rE| j  tv r6t| j  | _ t	d| j  d| j  d nt
j| j sUt| j  d | j | _dS t| j trt| j }| jdurp| j|d< | jdurt| jtr| j|d< t|}t	d	|  tt| || _dS dS )
a  
        Set up the default properties for the server.

        The properties argument can take on one of 3 value types

        1. File path on system or in CLASSPATH (e.g. /path/to/server.props or StanfordCoreNLP-french.properties
        2. Name of a Stanford CoreNLP supported language (e.g. french or fr)
        3. Python dictionary (properties written to tmp file for Java server, erased at end)

        In addition, an annotators list and output_format can be specified directly with arguments. These
        will overwrite any settings in the specified properties.

        If no properties are specified, the standard Stanford CoreNLP English server will be launched. The outputFormat
        will be set to 'serialized' and use the ProtobufAnnotationSerializer.
        NzLproperties passed invalid value (not a str or dict), setting properties = {}z&Using CoreNLP default properties for: z.  Make sure to have z^ models jar (available for download here: https://stanfordnlp.github.io/CoreNLP/) in CLASSPATHzN does not correspond to a file path. Make sure this file is in your CLASSPATH.r.   r'   z Writing properties to tmp file: )r-   r   r   r+   rs   r   r   r   LANGUAGE_SHORTHANDS_TO_FULLrt   rC   rD   rE   r   r.   r/   write_corenlp_propsr[   r\   rK   )r^   server_start_propertiestmp_pathr   r   r   r     s6   "








z$CoreNLPClient._setup_server_defaultsc           	   
   K   sT  | j tjur
|   z_|dd}|dkrd}n|dkrd}ntd| d|v rDd|v rDtj|d |d |d	< |	d |	d tj
| jft|t| d
|d|i| jd d d|}|  |W S  tjjy{ } ztdd}~w tjjy } z!|jdur|jjdurt|jj||jrt|jd |t |d}~ww )z
        Send a request to the CoreNLP server.

        :param (str | bytes) buf: data to be sent with the request
        :param (dict) properties: properties that the server expects
        :return: request result
        inputFormatr$   text/plain; charset=utf-8r#   application/x-protobufUnrecognized inputFormat r   r   auth)r-   resetDefaultcontent-typer>     paramsdataheadersr   z]Timeout requesting to CoreNLPServer. Maybe server is unavailable or your document is too longNr   )r   r<   r?   r   rb   r)   ra   r   HTTPBasicAuthpoppostrR   r   r   r   raise_for_statusrd   Timeoutr9   RequestExceptionresponser$   r2   args)	r^   bufr-   reset_defaultr   input_formatctyperrf   r   r   r   _request  sB   


zCoreNLPClient._requestc           
      K   sZ  t |||d i }| jdur| j|d< | jdur| j|d< t|tkr;t|r4d| i}|du r3d}ntd| t|tkrF|	| |duret|tksVt|t
kret|tkr^|nd||d< |durst|tkrs||d< |du ryd	}| j|d
||fi |}|d dkr| S |d dkrt }	t|	|j |	S |d dv r|jS |S )a  
        Send a request to the CoreNLP server.

        :param (str | unicode) text: raw text for the CoreNLPServer to parse
        :param (list | string) annotators: list of annotators to use
        :param (str) output_format: output type from server: serialized, json, text, conll, conllu, or xml
        :param (dict) properties: additional request properties (written on top of defaults)
        :param (bool) reset_default: don't use server defaults

        Precedence for settings:

        1. annotators and output_format args
        2. Values from properties dict
        3. Client defaults self.annotators and self.output_format (set during client construction)
        4. Server defaults

        Additional request parameters (apart from CoreNLP pipeline properties) such as 'username' and 'password'
        can be specified with the kwargs.

        :return: request result
        r,   Nr.   r'   pipelineLanguageTz Unrecognized properties keyword r   Futf-8r"   r#   )r$   r!   r    r%   )r0   r.   r/   r*   r   r   r   r)   r+   updater   ry   r   encoder"   r   r   contentr$   )
r^   r$   r.   r/   r-   r   r   request_propertiesr   docr   r   r   annotate  s>   




 zCoreNLPClient.annotatec                 C   s   |d u ri }| dddd |r t|tkr|nd||d< t }t|| | }W d    n1 s8w   Y  | ||}t	 }t
||j |S )Nr#   6edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer)r   r'   
serializerr   r.   )r   r*   r   ry   ioBytesIOr   getvaluer   r   r   r   )r^   r   r.   r-   streammsgr   r   r   r   r   $  s"   


zCoreNLPClient.updatec                 C   $   |  d|||||}|rt|}|S )Nz/tokensregex_CoreNLPClient__regexregex_matches_to_indexed_wordsr^   r$   patternfilterto_wordsr.   r-   matchesr   r   r   tokensregex7  s   zCoreNLPClient.tokensregexc                 C   r   )Nz/semgrexr   r   r   r   r   semgrex>  s   zCoreNLPClient.semgrexc                 C   s4   |j r|j |_|jD ]}|j }| || q
d S rB   )labelvaluechildrenchildaddfill_tree_proto)r^   tree
proto_treer  proto_childr   r   r   r  D  s   

zCoreNLPClient.fill_tree_protoc                 C   sb  |d u r!| j d ur!t| j tsJ | j d}d|vr | j d }nd}|d u r+td|d ur|d u r5i }d|d< d|vrAd	|d< t }g }	t|D ]9\}
}|j }|
|_	t
|	|_| }|	| t
|	|_| ||j |D ]}|j }||_||_d
|_qrqJd
|	|_t }t|| | }W d    n1 sw   Y  | d|||||S )Nr   parsez,parseztokenize,ssplit,pos,parsez(Cannot have None as a pattern for tregexr#   r   r   r   rk   z/tregex)r.   r   r   rO   r)   r   	enumeratesentencer  sentenceIndexlentokenOffsetBeginleaf_labelsextendtokenOffsetEndr  	parseTreetokenr   wordafterry   r$   r   r   r   r   r   )r^   r$   r   r   r.   r-   treespiecesr   	full_texttree_idxr  r	  leavesr  r  r   r   r   r   tregexK  sJ   








zCoreNLPClient.tregexc              
   C   sV  | j tjur
|   |du ri }|ddd |r(t|tr$d|n||d< d|d< d	|d
< zQ|dd}|dkr>d}n|dkrEd}nt	d| t
j| j| ||t|dt|trb|dn|d|i| jd d d}	|	  |	jdu r{d|	_t|	jW S  t
jy }
 z|	jdrt|	jt|	jd}
~
w tjy   t|	jw )a  
        Send a regex-related request to the CoreNLP server.

        :param (str | unicode) path: the path for the regex endpoint
        :param text: raw text for the CoreNLPServer to apply the regex
        :param (str | unicode) pattern: regex pattern
        :param (bool) filter: option to filter sentences that contain matches, if false returns matches
        :param properties: option to filter sentences that contain matches, if false returns matches
        :return: request result
        Nr$   r   )r   r   r   r.   r"   r'   trueztokenize.codepointr   r   r#   r   r   )r   r   r-   r   r   r>   r   r   r   )r   r<   r?   r   r   r   r   ry   rb   r)   ra   r   rR   r   r   r   r   encodingr"   loadsr$   	HTTPError
startswithr9   r2   JSONDecodeError)r^   rD   r$   r   r   r.   r-   r   r   r   rf   r   r   r   __regexz  sR   




zCoreNLPClient.__regexc              
   C   s   | j tjur
|   |du ri }d|d< d}d|d< z5tj| jd dt|it|tr0|	d	n|d
|i| j
d d d}|  |jdu rId	|_t|jW S  tjyl } z|jdrct|jt|jd}~w tjyx   t|jw )z
        Send a request to the server which processes the text using SceneGraph

        This will require a new CoreNLP release, 4.5.5 or later
        Nr$   r   r   r"   r'   z/scenegraphr-   r   r   r>   r   r   r   )r   r<   r?   r   ra   r   rR   r   r   r   r   r   r  r"   r  r$   r  r  r9   r2   r  )r^   r$   r-   r   r   rf   r   r   r   
scenegraph  s8   	



zCoreNLPClient.scenegraph)F)NNNN)NN)FFNN)NNFNNNrB   )r4   r5   r6   r7   DEFAULT_ENDPOINTDEFAULT_TIMEOUTDEFAULT_THREADSr   DEFAULT_MEMORYDEFAULT_MAX_CHAR_LENGTHr<   r@   r_   r   r   r   r   r   r   r   r  r  r   r!  __classcell__r   r   r   r   r      sF    p
3
'
H



/>r   c                 C   s   i }t | 3}dd | dD }|D ]}|dd }t|d }||d }||| < qW d   |S 1 s<w   Y  |S )z5 Read a Stanford CoreNLP properties file into a dict c                 S   s"   g | ]}|  r|d s|qS )#)stripr  ).0
entry_liner   r   r   
<listcomp>  s
    
z&read_corenlp_props.<locals>.<listcomp>
=r   N)rw   readrO   r  r)  )
props_path
props_dictrJ   entry_linesr+  kk_lenvr   r   r   read_corenlp_props  s   

r6  c                 C   s   |du rdt  jdd  d}t|sJ t|d*}|  D ]\}}t|tr1d	|}n|}|
| d| d q"W d   |S 1 sJw   Y  |S )	z4 Write a Stanford CoreNLP properties dict to a file Nzcorenlp_server-   z.propsrj   r   z = z

)uuiduuid4hexrF   rG   rw   itemsr   r   ry   write)r1  	file_pathrJ   r3  r5  writeable_vr   r   r   r     s   

r   c                 C   s   dd t | d D }|S )z
    Transforms tokensregex and semgrex matches to indexed words.
    :param matches: unprocessed regex matches
    :return: flat array of indexed words
    c              	   S   sD   g | ]\}}|  D ]\}}|d kr
t|fi td|fgq
qS )lengthr	  )r;  r+   )r*  isr3  r5  r   r   r   r,    s    z2regex_matches_to_indexed_words.<locals>.<listcomp>	sentences)r  )r   wordsr   r   r   r     s   
r   )r   r2   r9   r   )NNNrB   )4r7   r[   rl   enumr   rC   rera   loggingr"   rN   rn   rv   r   sysr8  r   pathlibr   urllib.parser   stanza.protobufr   r   r   r   
__author__	getLoggerrs   compilerF   r   r   r   r(   r0   	Exceptionr2   r9   r:   r;   Enumr<   rK   objectrL   r   r   r6  r   r   __all__r   r   r   r   <module>   sh    




}   m
