o
    hf                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
T ejjejjgZdd Zd dd	Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zd Zdd Zd Zdd Zd Zdd ZdS )!    N)pretrain)UNK_ID)*c                 C   s8   t | dksJ d| v sJ d| v sJ d| v sJ d S )N   unbanmoxopal)len)vocab r   \/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/tests/common/test_pretrain.pycheck_vocab   s   r   Fc              
   C   sR   t g dg dg dg dg dg dg dg}|r d|t< t j| | d S )N)        r   r   r   )g      ?g       @g      @g      @)g      @g      @g      @g       @)g      "@g      $@g      &@g      (@)nparrayr   testingassert_allclose)embunkexpectedr   r   r   check_embedding   s   
r   c                 C   s   t | j t| j d S )N)r   r
   r   r   ptr   r   r   check_pretrain!   s   
r   c                  C       t jt ddd} t|  d S )Nz/in/tiny_emb.txtFvec_filenamesave_to_filer   PretrainTEST_WORKING_DIRr   r   r   r   r   test_text_pretrain%      r"   c                  C   r   )N/in/tiny_emb.xzFr   r   r   r   r   r   test_xz_pretrain)   r#   r%   c                  C   r   )Nz/in/tiny_emb.gzFr   r   r   r   r   r   test_gz_pretrain-   r#   r&   c                  C   r   )Nz/in/tiny_emb.zipFr   r   r   r   r   r   test_zip_pretrain1   r#   r'   c                  C   r   )Nz/in/tiny_emb.csvF)csv_filenamer   r   r   r   r   r   test_csv_pretrain5   r#   r)   c               	   C   s   t jt dddd} z6|   tj| jt dd}t| tj| jdd}t| tj	| jdd	}t
|d
  W t| j dS t| j w )zH
    Test saving a pretrain and then loading from the existing file
    /outz.ptFdirsuffixdeleter$   )filenamer   unban_mox_opalT)weights_onlyr   N)tempfileNamedTemporaryFiler!   closer   r    namer   torchloadr   osunlink)test_pt_filer   pt2pt3r   r   r   test_resave_pretrain9   s   r=   z3
3 4
unban mox 1 2 3 4
opal 5 6 7 8
foo 9 10 11 12
c               	   C   s   t jt dddd} z/| t  |   tj| j	dd}t
|j d|jv s+J d|jv s2J W t| j	 dS t| j	 w )	z
    Test reading a pretrain with an ascii space in it

    The vocab word with a space in it should have the correct number
    of dimensions read, with the space converted to nbsp
    r*   z.txtFr+   r   u
   unban moxz	unban moxN)r2   r3   r!   writeSPACE_PRETRAINencoder4   r   r    r5   r   r   r
   r8   r9   )test_txt_filer   r   r   r   test_whitespaceV   s   
rB   z+
unban 1 2 3 4
mox 5 6 7 8
opal 9 10 11 12
c               	   C   s   t jtd8} tj| d}t|ddd}|t W d   n1 s%w   Y  t	j
|dd}t|j W d   dS 1 sAw   Y  dS )	z;
    Check loading a pretrain with no rows,cols header
    r,   tiny.txtwutf-8encodingNFr   )r2   TemporaryDirectoryr!   r8   pathjoinopenr>   NO_HEADER_PRETRAINr   r    r   r   tmpdirr/   foutr   r   r   r   test_no_headerp   s   "rQ   z=
unban 1 2 3 4
mox 5 6 7 8
opal 9 10 11 12
<unk> -1 -1 -1 -1
c               	   C   s   t jtd:} tj| d}t|ddd}|t W d   n1 s%w   Y  t	j
|dd}t|jd	d
 W d   dS 1 sCw   Y  dS )zI
    Check loading a pretrain with <unk> at the end, like GloVe does
    rC   rD   rE   rF   rG   NFr   T)r   )r2   rI   r!   r8   rJ   rK   rL   r>   UNK_PRETRAINr   r    r   r   rN   r   r   r   rQ      s   ")F)r8   r2   pytestnumpyr   r6   stanza.models.commonr   stanza.models.common.vocabr   stanza.testsmarktravispipeline
pytestmarkr   r   r   r"   r%   r&   r'   r)   r=   stripr?   rB   rM   rQ   rR   r   r   r   r   <module>   s<    
