U
    <Af                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ erdd dlmZ eeZG dd deZdS )    N)nullcontext)TYPE_CHECKINGDictListOptionalTuple)SentenceEvaluator)SentenceTransformerc                	       s   e Zd ZdZdeeeef  deeeef  eee	e
e d fdd	Zddeeeeeef dddZeedddZ  ZS )MSEEvaluatorFromDataFrameu  
    Computes the mean squared error (x100) between the computed sentence embedding and some target sentence embedding.

    Args:
        dataframe (List[Dict[str, str]]): It must have the following format. Rows contains different, parallel sentences.
            Columns are the respective language codes::

            [{'en': 'My sentence in English', 'es': 'Oración en español', 'fr': 'Phrase en français'...},
             {'en': 'My second sentence', ...}]
        teacher_model (SentenceTransformer): The teacher model used to compute the sentence embeddings.
        combinations (List[Tuple[str, str]]): Must be of the format ``[('en', 'es'), ('en', 'fr'), ...]``.
            First entry in a tuple is the source language. The sentence in the respective language will be fetched from
            the dataframe and passed to the teacher model. Second entry in a tuple the the target language. Sentence
            will be fetched from the dataframe and passed to the student model
        batch_size (int, optional): The batch size to compute sentence embeddings. Defaults to 8.
        name (str, optional): The name of the evaluator. Defaults to "".
        write_csv (bool, optional): Whether to write the results to a CSV file. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. If None, uses the model's
            current truncation dimension. Defaults to None.
        TNr	   )	dataframeteacher_modelcombinations
batch_sizename	write_csvtruncate_dimc              	      sV  t    || _|| _|| _|r(d| }d| d | _ddg| _d| _|| _|| _	i | _
td t }| jD ]\}	}
g }g }|D ]N}||	  dkr||
  dkr|||	  |||	  |||
  q||f| j
|	|
f< | jd	|	|
 qnt|}| j	d krt n
|| j	 |j|| jd
}W 5 Q R X dd t||D | _d S )N_Zmse_evaluationz_results.csvepochstepsnegative_msezCompute teacher embeddingsr   z{}-{}r   c                 S   s   i | ]\}}||qS  r   ).0sentZembr   r   ^/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.py
<dictcomp>T   s      z6MSEEvaluatorFromDataFrame.__init__.<locals>.<dictcomp>)super__init__r   r   r   csv_filecsv_headersZprimary_metricr   r   dataloggerinfosetstripaddappendformatlistr   truncate_sentence_embeddingsencodezipteacher_embeddings)selfr   r   r   r   r   r   r   Zall_source_sentencessrc_langtrg_langsrc_sentencestrg_sentencesrowZall_src_embeddings	__class__r   r   r   '   s<    



 z"MSEEvaluatorFromDataFrame.__init__)modeloutput_pathr   r   returnc              
      s  |   g } jD ]\}} j||f \}}	t fdd|D }
 jd krTt n
| j t|j|	 j	d}W 5 Q R X |
| d 
 }|d9 }|| td j|| td| q|d k	rN jrNtj| j}tj|}t|d|r
d	nd
dd4}t|}|s2| j |||g|  W 5 Q R X dt
|  i} | j} || |S )Nc                    s   g | ]} j | qS r   )r.   )r   r   r/   r   r   
<listcomp>_   s     z6MSEEvaluatorFromDataFrame.__call__.<locals>.<listcomp>r      d   z%MSE evaluation on {} dataset - {}-{}:zMSE (*100):	{:4f}r   awzutf-8)newlinemodeencodingr   )evalr   r"   npZasarrayr   r   r+   r,   r   Zmeanr(   r#   r$   r)   r   r   ospathjoinr    isfileopencsvwriterwriterowr!   itemZprefix_name_to_metricsZ store_metrics_in_model_card_data)r/   r8   r9   r   r   Z
mse_scoresr0   r1   r2   r3   Zsrc_embeddingsZtrg_embeddingsZmseZcsv_pathZoutput_file_existsfrL   Zmetricsr   r;   r   __call__V   s0     

z"MSEEvaluatorFromDataFrame.__call__)r:   c                 C   s   dS )NzKnowledge Distillationr   r;   r   r   r   descriptionz   s    z%MSEEvaluatorFromDataFrame.description)r   r   TN)Nr7   r7   )__name__
__module____qualname____doc__r   r   strr   intboolr   r   floatrP   propertyrQ   __classcell__r   r   r5   r   r
      s4       0        
$r
   )rK   loggingrF   
contextlibr   typingr   r   r   r   r   ZnumpyrE   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Z)sentence_transformers.SentenceTransformerr	   	getLoggerrR   r#   r
   r   r   r   r   <module>   s   
