o
    h&                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlZddlZddl	Z	dddZ
dd	d
ZdddZdddZdd ZedkrGe  dS dS )zT
Visualize named entities from different texts and Stanza documents (+ CoNLL files)
    )displacy)Doc)Span)is_right_to_leftNc                 C   st  t dg t|}}}| jt|d}}}	|r?t| j}|r?|D ]}
|	|
vr>||
 }||
 |||	|
ddd  < q$|D ]}g g d}}}t|j	D ]q\}}|r|j
 r|s|j
ddd g}|d }|t|j	d kr|j	| j
 r||j	| j
ddd  |d7 }|t|j	d kr|j	| j
 s{t|}|D ]}|| qd}qP|r|j
 r|rqP||j
 d}qPt|j|d}|jD ]G}|r|j|vrqg }|jD ]}||jd	 d  q|st||d	 |d d |j}nt||d	 |d d |	|jddd  }|| q|| || qAd
|i}|r*||d< |D ]}tj|d|d q,dS )a;  
    Takes a stanza doc object and language pipeline and visualizes the named entities within it.

    Stanza currently supports a limited amount of languages for NER, which you can view here:
    https://stanfordnlp.github.io/stanza/ner_models.html

    To view only a specific type(s) of named entities, set the optional 'select' argument to
    a list of the named entity types. Ex: select=["PER", "ORG", "GPE"] to only see entities tagged as Person(s),
    Organizations, and Geo-political entities. A full list of the available types can be found here:
    https://stanfordnlp.github.io/stanza/ner_models.html (ctrl + F "The following table").

    The colors argument is formatted as a dictionary of NER tags with their corresponding colors, which can be
    represented as a string (ex: "blue"), a color hex value (ex: #aa9cfc), or as a linear gradient of color
    values (ex: "linear-gradient(90deg, #aa9cfc, #fc9ce7)").

    Do not change the 'rtl_clr_adjusted' argument; it is used for ensuring that the visualize_strings function
    works properly on rtl languages.
    enu   ‮NF   T)wordsr   entscolorsent)styleoptions)spacyblankcopydeepcopy	sentencesr   reversedpop	enumerater	   textisasciilenappendr   vocabr
   typetokensidr   set_entsr   render)doclanguageselectr   model	documentsvisualization_colorsr   rtlRTL_OVERRIDEcolorclr_valsentencer	   display_entsalready_foundiword	to_appendnext_word_indextokendocumentr   found_indexesto_addvisualization_options r7   g/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/visualization/ner_visualization.pyvisualize_ner_doc   s`   

""

*
r9   c                 C   s   || }t ||j|| dS )a  
    Takes in a text string and visualizes the named entities within the text.

    Required args also include a pipeline code, the two-letter code for a language defined by Universal Dependencies (ex: "en" for English).

    Lastly, the user must provide an NLP pipeline - we recommend Stanza (ex: pipe = stanza.Pipeline('en')).

    Optionally, the 'select' argument allows for specific NER tags to be highlighted; the 'color' argument allows
    for specific NER tags to have certain color(s).
    N)r9   lang)r   piper#   r   r!   r7   r7   r8   visualize_ner_strZ   s   r<   c                 C   s,   t j|dd}| D ]
}t||||d q	dS )a  
    Takes in a list of strings and a language code (Stanza defines these, ex: 'en' for English) to visualize all
    of the strings' named entities.

    The strings are processed by the Stanza pipeline and the named entities are displayed. Each text is separated by a delimiting line.

    Optionally, the 'select' argument may be configured to only visualize given named entities (ex: select=['ORG', 'PERSON']).

    The optional colors argument is formatted as a dictionary of NER tags with their corresponding colors, which can be
    represented as a string (ex: "blue"), a color hex value (ex: #aa9cfc), or as a linear gradient of color
    values (ex: "linear-gradient(90deg, #aa9cfc, #fc9ce7)").
    ztokenize,ner)
processorsr#   r   N)stanzaPipeliner<   )textslanguage_coder#   r   	lang_piper   r7   r7   r8   visualize_stringsi   s   rD   c                 C   s   | D ]
}t ||||d qdS )aj  
    Takes in a list of doc and a language code (Stanza defines these, ex: 'en' for English) to visualize all
    of the strings' named entities.

    Each text is separated by a delimiting line.

    Optionally, the 'select' argument may be configured to only visualize given named entities (ex: select=['ORG', 'PERSON']).

    The optional colors argument is formatted as a dictionary of NER tags with their corresponding colors, which can be
    represented as a string (ex: "blue"), a color hex value (ex: #aa9cfc), or as a linear gradient of color
    values (ex: "linear-gradient(90deg, #aa9cfc, #fc9ce7)").
    r>   N)r9   )docsrB   r#   r   r!   r7   r7   r8   visualize_docs|   s   rF   c                  C   sd   ddg} ddg}g d}t | d t |ddd	d
dd t |dddgd t |dddddd d S )Na  Samuel Jackson, a Christian man from Utah, went to the JFK Airport for a flight to New York.
                               He was thinking of attending the US Open, his favorite tennis tournament besides Wimbledon.
                               That would be a dream trip, certainly not possible since it is $5000 attendance and 5000 miles away.
                               On the way there, he watched the Super Bowl for 2 hours and read War and Piece by Tolstoy for 1 hour.
                               In New York, he crossed the Brooklyn Bridge and listened to the 5th symphony of Beethoven as well as
                               "All I want for Christmas is You" by Mariah Carey.zVBarack Obama was born in Hawaii. He was elected President of the United States in 2008u  来自犹他州的基督徒塞缪尔杰克逊前往肯尼迪机场搭乘航班飞往纽约。
                             他正在考虑参加美国公开赛，这是除了温布尔登之外他最喜欢的网球赛事。
                             那将是一次梦想之旅，当然不可能，因为它的出勤费为 5000 美元，距离 5000 英里。
                             在去的路上，他看了 2 个小时的超级碗比赛，看了 1 个小时的托尔斯泰的《战争与碎片》。
                               在纽约，他穿过布鲁克林大桥，聆听了贝多芬的第五交响曲以及 玛丽亚凯莉的“圣诞节我想要的就是你”。u8   我觉得罗家费德勒住在加州, 在美国里面。)u   .أعيش في سان فرانسيسكو ، كاليفورنيا. اسمي أليكس وأنا ألتحق بجامعة ستانفورد. أنا أدرس علوم الكمبيوتر وأستاذي هو كريس مانينغuC   اسمي أليكس ، أنا من الولايات المتحدة.u\  صامويل جاكسون ، رجل مسيحي من ولاية يوتا ، ذهب إلى مطار جون كنيدي في رحلة إلى نيويورك. كان يفكر في حضور بطولة الولايات المتحدة المفتوحة للتنس ، بطولة التنس المفضلة لديه إلى جانب بطولة ويمبلدون. ستكون هذه رحلة الأحلام ، وبالتأكيد ليست ممكنة لأنها تبلغ 5000 دولار للحضور و 5000 ميل. في الطريق إلى هناك ، شاهد Super Bowl لمدة ساعتين وقرأ War and Piece by Tolstoy لمدة ساعة واحدة. في نيويورك ، عبر جسر بروكلين واستمع إلى السيمفونية الخامسة لبيتهوفن وكذلك "كل ما أريده في عيد الميلاد هو أنت" لماريا كاري.r   zhyellowredblue)PERSONDATEGPE)r   rK   rL   )r#   arpinkz(linear-gradient(90deg, #aa9cfc, #fc9ce7))PERLOCORG)rD   )
en_strings
zh_strings
ar_stringsr7   r7   r8   main   s   


rV   __main__)NN)__doc__r   r   spacy.tokensr   r   stanza.models.common.constantr   r?   r   r9   r<   rD   rF   rV   __name__r7   r7   r7   r8   <module>   s     

L


