U
    <Af                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZ ertd dlmZ eeZG dd deZdS )	    N)nullcontext)TYPE_CHECKINGDictListOptional)SentenceEvaluator)pytorch_cos_sim)SentenceTransformerc                
       sb   e Zd ZdZdee ee eeeeeee d fdd	Z	ddeeee
eef dddZ  ZS )TranslationEvaluatora  
    Given two sets of sentences in different languages, e.g. (en_1, en_2, en_3...) and (fr_1, fr_2, fr_3, ...),
    and assuming that fr_i is the translation of en_i.
    Checks if vec(en_i) has the highest similarity to vec(fr_i). Computes the accuracy in both directions

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TranslationEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

            # Load a parallel sentences dataset
            dataset = load_dataset("sentence-transformers/parallel-sentences-news-commentary", "en-nl", split="train[:1000]")

            # Initialize the TranslationEvaluator using the same texts from two languages
            translation_evaluator = TranslationEvaluator(
                source_sentences=dataset["english"],
                target_sentences=dataset["non_english"],
                name="news-commentary-en-nl",
            )
            results = translation_evaluator(model)
            '''
            Evaluating translation matching Accuracy of the model on the news-commentary-en-nl dataset:
            Accuracy src2trg: 90.80
            Accuracy trg2src: 90.40
            '''
            print(translation_evaluator.primary_metric)
            # => "news-commentary-en-nl_mean_accuracy"
            print(results[translation_evaluator.primary_metric])
            # => 0.906
    F    TN)source_sentencestarget_sentencesshow_progress_bar
batch_sizenameprint_wrong_matches	write_csvtruncate_dimc	           	         s   t    || _|| _|| _|| _|| _|| _|| _t	| jt	| jksLt
|rXd| }d| d | _ddddg| _|| _d| _d	S )
a  
        Constructs an evaluator based for the dataset

        The labels need to indicate the similarity between the sentences.

        Args:
            source_sentences (List[str]): List of sentences in the source language.
            target_sentences (List[str]): List of sentences in the target language.
            show_progress_bar (bool): Whether to show a progress bar when computing embeddings. Defaults to False.
            batch_size (int): The batch size to compute sentence embeddings. Defaults to 16.
            name (str): The name of the evaluator. Defaults to an empty string.
            print_wrong_matches (bool): Whether to print incorrect matches. Defaults to False.
            write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate sentence embeddings to. If None, the model's
                current truncation dimension will be used. Defaults to None.
        _Ztranslation_evaluationz_results.csvepochstepsZsrc2trgZtrg2srcmean_accuracyN)super__init__r   r   r   r   r   r   r   lenAssertionErrorcsv_filecsv_headersr   Zprimary_metric)	selfr   r   r   r   r   r   r   r   	__class__ Y/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/evaluation/TranslationEvaluator.pyr   8   s    
zTranslationEvaluator.__init__r	   )modeloutput_pathr   r   returnc              	   C   s  |dkr0|dkrd| }q4d| d| d}nd}| j d k	rP|d| j  d7 }td	| j d
| d | j d krzt n
|| j B t|j| j	| j
| jdd}t|j| j| j
| jdd}W 5 Q R X t||   }d}	d}
tt|D ]}t|| }||kr|	d7 }	q| jrtd|d|d| td| j	|  td| j| d|| | dd td| j| d|| | dd t|| }t|dd dd}|d d D ](\}}td|d|dd| j|  qq|j}tt|D ]&}t|| }||kr|
d7 }
q|	t| }|
t| }td|d  td |d  |d k	r| jrtj|| j}tj |}t!|d|rd!nd"d#d$4}t"#|}|s|$| j% |$||||g W 5 Q R X |||| d% d&}| &|| j}| '|| |S )'Nr$   z after epoch z
 in epoch z after z stepsr   z (truncated to )z=Evaluating translation matching Accuracy of the model on the z dataset:F)r   r   Zconvert_to_numpyr      z
Incorrect  : Sourcezis most similar to targetzinstead of targetzSource     :zPred Target:z(Score: z.4fzTrue Target:c                 S   s   | d S )Nr*   r"   )xr"   r"   r#   <lambda>       z/TranslationEvaluator.__call__.<locals>.<lambda>T)keyreverse   	zAccuracy src2trg: {:.2f}d   zAccuracy trg2src: {:.2f}awzutf-8)newlinemodeencoding   )Zsrc2trg_accuracyZtrg2src_accuracyr   )(r   loggerinfor   r   Ztruncate_sentence_embeddingstorchstackencoder   r   r   r   r   detachcpunumpyranger   npZargmaxr   print	enumeratesortedTformatr   ospathjoinr   isfileopencsvwriterwriterowr   Zprefix_name_to_metricsZ store_metrics_in_model_card_data)r   r%   r&   r   r   Zout_txtZembeddings1Zembeddings2Zcos_simsZcorrect_src2trgZcorrect_trg2srciZmax_idxresultsidxZscoreZacc_src2trgZacc_trg2srcZcsv_pathZoutput_file_existsfrN   Zmetricsr"   r"   r#   __call__f   s    
	

$$$


zTranslationEvaluator.__call__)Fr   r   FTN)Nr$   r$   )__name__
__module____qualname____doc__r   strboolintr   r   r   floatrT   __classcell__r"   r"   r    r#   r
      s6   (      /        
r
   )rM   loggingrH   
contextlibr   typingr   r   r   r   r@   rB   r;   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Zsentence_transformers.utilr   Z)sentence_transformers.SentenceTransformerr	   	getLoggerrU   r9   r
   r"   r"   r"   r#   <module>   s   
