o
    hX/                  	   @   s   U d Z ddlmZmZmZmZmZmZ ddlm	Z	 g dZ
dZi Zeeef ed< G dd dZG d	d
 d
eZG dd deZG dd deZdededee fddZdee dededee fddZdeeef fddZdS )z]
emoji.tokenizer
~~~~~~~~~~~~~~~

Components for detecting and tokenizing emoji in strings.

    )List
NamedTupleDictUnionIteratorAny)unicode_codes)
EmojiMatchEmojiMatchZWJEmojiMatchZWJNonRGITokentokenizefilter_tokensu   ‍_SEARCH_TREEc                   @   s   e Zd ZdZdZdedededeeee	f df fdd	Z
d
eee	f fddZd
efddZd
ed fddZd
efddZdS )r	   zd
    Represents a match of a "recommended for general interchange" (RGI)
    emoji in a string.
    emojistartenddatar   r   r   r   Nc                 C   s"   || _ 	 || _	 || _	 || _d S Nr   )selfr   r   r   r    r   J/var/www/html/env_mimamsha/lib/python3.10/site-packages/emoji/tokenizer.py__init__"   s   zEmojiMatch.__init__returnc                 C   s6   | j r| j  }| j|d< | j|d< |S | j| jdS )z
        Returns a copy of the data from :data:`EMOJI_DATA` for this match
        with the additional keys ``match_start`` and ``match_end``.
        match_start	match_end)r   r   )r   copyr   r   )r   emj_datar   r   r   	data_copy1   s   


zEmojiMatch.data_copyc                 C   s
   t | jv S )zp
        Checks if this is a ZWJ-emoji.

        :returns: True if this is a ZWJ-emoji, False otherwise
        )_ZWJr   r   r   r   r   is_zwj>   s   
zEmojiMatch.is_zwj)r
   r	   c                 C   s   |   rt| S | S )z
        Splits a ZWJ-emoji into its constituents.

        :returns: An :class:`EmojiMatchZWJ` containing the "sub-emoji" if this is a ZWJ-emoji, otherwise self
        )r"   r
   r!   r   r   r   splitG   s   zEmojiMatch.splitc                 C   s&   | j j d| j d| j d| j dS N(z, :))	__class____name__r   r   r   r!   r   r   r   __repr__S   s   &zEmojiMatch.__repr__)r)   
__module____qualname____doc__	__slots__strintr   r   r   r   r   boolr"   r#   r*   r   r   r   r   r	      s     
	r	   c                       s^   e Zd ZdZdZdef fddZdefddZde	fd	d
Z
dddZdefddZ  ZS )r
   zr
    Represents a match of multiple emoji in a string that were joined by
    zero-width-joiners (ZWJ/``\u200D``).)emojismatchc              	      sz   t  |j|j|j|j g | _	 |j}|jtD ] }t	|||t
| tj|d }| j| |t
|d 7 }qd S )N   )superr   r   r   r   r   r2   r#   r    r	   lenr   
EMOJI_DATAgetappend)r   r3   iemr(   r   r   r   ^   s    zEmojiMatchZWJ.__init__r   c                 C   s   t dd | jD S )z1
        Joins a ZWJ-emoji into a string
        c                 s       | ]}|j V  qd S r   r   .0r;   r   r   r   	<genexpr>o       z%EmojiMatchZWJ.join.<locals>.<genexpr>)r    joinr2   r!   r   r   r   rD   j   s   zEmojiMatchZWJ.joinc                 C   s   dS )NTr   r!   r   r   r   r"   q      zEmojiMatchZWJ.is_zwjc                 C   s   | S r   r   r!   r   r   r   r#   t   rE   zEmojiMatchZWJ.splitc                 C   s(   | j j d|   d| j d| j dS r$   )r(   r)   rD   r   r   r!   r   r   r   r*   w   s   (zEmojiMatchZWJ.__repr__)r   r
   )r)   r+   r,   r-   r.   r	   r   r/   rD   r1   r"   r#   r*   __classcell__r   r   r=   r   r
   W   s    
r
   c                   @   s8   e Zd ZdZdedefddZdd Zdefd	d
ZdS )r   a  
    Represents a match of multiple emoji in a string that were joined by
    zero-width-joiners (ZWJ/``\u200D``). This class is only used for emoji
    that are not "recommended for general interchange" (non-RGI) by Unicode.org.
    The data property of this class is always None.
    first_emoji_matchsecond_emoji_matchc                 C   s   ||g| _ 	 |   d S r   )r2   _update)r   rG   rH   r   r   r   r      s   
zEmojiMatchZWJNonRGI.__init__c                 C   s>   t dd | jD | _| jd j| _| jd j| _d | _d S )Nc                 s   r>   r   r?   r@   r   r   r   rB      rC   z.EmojiMatchZWJNonRGI._update.<locals>.<genexpr>r   )r    rD   r2   r   r   r   r   r!   r   r   r   rI      s   
zEmojiMatchZWJNonRGI._updatenext_emoji_matchc                 C   s   | j | |   d S r   )r2   r9   rI   )r   rK   r   r   r   _add   s   zEmojiMatchZWJNonRGI._addN)r)   r+   r,   r-   r	   r   rI   rL   r   r   r   r   r   {   s
    r   c                   @   s*   e Zd ZU dZeed< eeef ed< dS )r   z
    A named tuple containing the matched string and its :class:`EmojiMatch` object if it is an emoji
    or a single character that is not a unicode emoji.
    charsvalueN)r)   r+   r,   r-   r/   __annotations__r   r	   r   r   r   r   r      s   
 r   stringkeep_zwjr   c                 c   s   t  }tj}g }d}t| }g }||k rd}| | }	||v r3|d7 }|	tkr2|r2|t|	|	 q|	|v r|d }
||	 }|
|k rb| |
 |v rb|
|v rNn|| |
  }|
d7 }
|
|k rb| |
 |v sId|v r|d }| ||
 }t|||
|}|
d }d}|t|| ne|	tkr|r|d j|v r|dkr| |d  |v r|| ||d j d tj	d kr|t
d	d
 |dd D  }| | tkr|d7 }|d= n|dd= n|t|d j }|d= q|r|E dH  g }|s|	dkr|	dkr|t|	|	 |d7 }||k s|E dH  dS )a  
    Finds unicode emoji in a string. Yields all normal characters as a named
    tuple :class:`Token` ``(char, char)`` and all emoji as :class:`Token` ``(chars, EmojiMatch)``.

    :param string: String contains unicode characters. MUST BE UNICODE.
    :param keep_zwj: Should ZWJ-characters (``\u200D``) that join non-RGI emoji be
        skipped or should be yielded as normal characters
    :return: An iterable of tuples :class:`Token` ``(char, char)`` or :class:`Token` ``(chars, EmojiMatch)``
    r   Fr4   r   TrJ   status	componentc                 s   s    | ]}t |jV  qd S r   )r6   rM   )rA   tr   r   r   rB      s    ztokenize.<locals>.<genexpr>Nu   ︎u   ️)get_search_treer   r7   r6   r    r9   r   r	   rM   STATUSsum)rP   rQ   treer7   resultr:   lengthignoreconsumedcharjsub_treer   code_points	match_objr   r   r   r      sv   


Dr   matches
emoji_only
join_emojic           	      c   sJ   |s|s| E dH  dS |s| D ]
}|j tkr|V  qdS d}d}d}g }| D ]v}|}|r5|jtkr5d}q't|jtr|r|rt|d jtrc|d j|j t|d j t |j  |d j|d< n%| }t|jtsoJ |	t|j t |j  t|j|j n|	| d}d}q'd}d}|E dH  |s|V  g }q'|E dH  dS )ap  
    Filters the output of `tokenize()`

    :param matches: An iterable of tuples of the form ``(match_str, result)``
        where ``result`` is either an EmojiMatch or a string.
    :param emoji_only: If True, only EmojiMatch are returned in the output.
        If False all characters are returned
    :param join_emoji: If True, multiple EmojiMatch are merged into
        a single :class:`EmojiMatchZWJNonRGI` if they are separated only by a ZWJ.

    :return: An iterable of tuples :class:`Token` ``(char, char)``,
        :class:`Token` ``(chars, EmojiMatch)`` or :class:`Token` ``(chars, EmojiMatchZWJNonRGI)``
    NFTrJ   )
rM   r    rN   
isinstancer	   r   rL   r   popr9   )	rc   rd   re   tokenprevious_is_emojiprevious_is_zwjpre_previous_is_emojiaccumulatorprevr   r   r   r      sX   




r   c                  C   sd   t s0tjD ]*} t }t| d }t| D ]\}}||vri ||< || }||kr.tj|  |d< qqt S )u  
    Generate a search tree for demojize().
    Example of a search tree::

        EMOJI_DATA =
        {'a': {'en': ':Apple:'},
        'b': {'en': ':Bus:'},
        'ba': {'en': ':Bat:'},
        'band': {'en': ':Beatles:'},
        'bandit': {'en': ':Outlaw:'},
        'bank': {'en': ':BankOfEngland:'},
        'bb': {'en': ':BB-gun:'},
        'c': {'en': ':Car:'}}

        _SEARCH_TREE =
        {'a': {'data': {'en': ':Apple:'}},
        'b': {'a': {'data': {'en': ':Bat:'},
                    'n': {'d': {'data': {'en': ':Beatles:'},
                                'i': {'t': {'data': {'en': ':Outlaw:'}}}},
                        'k': {'data': {'en': ':BankOfEngland:'}}}},
            'b': {'data': {'en': ':BB-gun:'}},
            'data': {'en': ':Bus:'}},
        'c': {'data': {'en': ':Car:'}}}

                   _SEARCH_TREE
                 /     |        ⧵
               /       |          ⧵
            a          b             c
            |        / |  ⧵          |
            |       /  |    ⧵        |
        :Apple:   ba  :Bus:  bb     :Car:
                 /  ⧵         |
                /    ⧵        |
              :Bat:    ban     :BB-gun:
                     /     ⧵
                    /       ⧵
                 band       bank
                /   ⧵         |
               /     ⧵        |
            bandi :Beatles:  :BankOfEngland:
               |
            bandit
               |
           :Outlaw:


    r4   r   )r   r   r7   r6   	enumerate)emjr`   lastidxr:   r^   r   r   r   rV   >  s   0
rV   N)r-   typingr   r   r   r   r   r   r   r   __all__r    r   r/   rO   r	   r
   r   r   r1   r   r   rV   r   r   r   r   <module>   s*     	=$
[
E