o
    hb                     @   sb   d dl Z d dlmZ d dlZd dlmZ d dlmZ dd Zdd Z	d	d
 Z
edkr/e
  dS dS )    N)defaultdict)Document)list_doc_entitiesc                  C   s*   t jdd} | jdtddd |  }|S )Nz/Report the coverage of one NER file on another.)descriptionfilename+zFile(s) to count)typenargshelp)argparseArgumentParseradd_argumentstr
parse_args)parserargs r   c/var/www/html/env_mimamsha/lib/python3.10/site-packages/stanza/utils/datasets/ner/count_entities.pyr   	   s   r   c            
   	   G   s   t t}| D ]C}t|%}tt|}tdd |jD }td||f  t	|}W d    n1 s4w   Y  |D ]}||d  
|d  q;qt| }|D ]}	t|	t||	  qRd S )Nc                 s   s     | ]}|j D ]}d V  qqdS )   N)tokens).0sentencetokenr   r   r   	<genexpr>   s    z!count_entities.<locals>.<genexpr>zNumber of tokens in %s: %dr   r   )r   listopenr   jsonloadsum	sentencesprintr   appendsortedkeyslen)
	filenamesentity_collectionr   findoc
num_tokensentitiesentr#   kr   r   r   count_entities   s   

r-   c                  C   s   t  } t| j  d S )N)r   r-   r   )r   r   r   r   main!   s   r.   __main__)r   collectionsr   r   stanza.models.common.docr   stanza.utils.datasets.ner.utilsr   r   r-   r.   __name__r   r   r   r   <module>   s   
