U
    <Af(                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZmZmZ d dlmZ d dlmZ d dlmZ erd dlmZ eeZG d	d
 d
eZdS )    N)nullcontext)TYPE_CHECKINGDictListOptionalUnion)paired_cosine_distancespaired_euclidean_distancespaired_manhattan_distances)SentenceEvaluator)InputExample)SimilarityFunction)SentenceTransformerc                       s   e Zd ZdZdee ee ee eeeef  ee	e
e
ee	 d	 fdd	Zeee d
ddZddee	e	eeef dddZ  ZS )TripletEvaluatora  
    Evaluate a model based on a triplet: (sentence, positive_example, negative_example).
    Checks if distance(sentence, positive_example) < distance(sentence, negative_example).

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TripletEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load a dataset with (anchor, positive, negative) triplets
            dataset = load_dataset("sentence-transformers/all-nli", "triplet", split="dev")

            # Initialize the TripletEvaluator using anchors, positives, and negatives
            triplet_evaluator = TripletEvaluator(
                anchors=dataset[:1000]["anchor"],
                positives=dataset[:1000]["positive"],
                negatives=dataset[:1000]["negative"],
                name="all-nli-dev",
            )
            results = triplet_evaluator(model)
            '''
            TripletEvaluator: Evaluating the model on the all-nli-dev dataset:
            Accuracy Cosine Distance:        95.60
            Accuracy Dot Product:            4.40
            Accuracy Manhattan Distance:     95.40
            Accuracy Euclidean Distance:     95.60
            '''
            print(triplet_evaluator.primary_metric)
            # => "all-nli-dev_max_accuracy"
            print(results[triplet_evaluator.primary_metric])
            # => 0.956
    N    FT)	anchors	positives	negativesmain_distance_functionname
batch_sizeshow_progress_bar	write_csvtruncate_dimc
           
         s   t    || _|| _|| _|| _|	| _t| jt| jks@tt| jt| jksXt|rdt	|nd| _
|| _|dkrt tjkpt tjk}|| _d|rd| nd d | _dddd	d
g| _|| _dS )a  
        Initializes a TripletEvaluator object.

        Args:
            anchors (List[str]): Sentences to check similarity to. (e.g. a query)
            positives (List[str]): List of positive sentences
            negatives (List[str]): List of negative sentences
            main_distance_function (Union[str, SimilarityFunction], optional):
                The distance function to use. If not specified, use cosine similarity,
                dot product, Euclidean, and Manhattan. Defaults to None.
            name (str): Name for the output. Defaults to "".
            batch_size (int): Batch size used to compute embeddings. Defaults to 16.
            show_progress_bar (bool): If true, prints a progress bar. Defaults to False.
            write_csv (bool): Write results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate sentence embeddings to.
                `None` uses the model's current truncation dimension. Defaults to None.
        NZtriplet_evaluation_r   z_results.csvepochstepsZaccuracy_cosinusaccuracy_manhattanaccuracy_euclidean)super__init__r   r   r   r   r   lenAssertionErrorr   r   r   loggergetEffectiveLevelloggingINFODEBUGr   csv_filecsv_headersr   )
selfr   r   r   r   r   r   r   r   r   	__class__ U/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/evaluation/TripletEvaluator.pyr!   ;   s"    
zTripletEvaluator.__init__)examplesc                 K   sV   g }g }g }|D ]4}| |jd  | |jd  | |jd  q| |||f|S )Nr         )appendZtexts)clsr0   kwargsr   r   r   Zexampler.   r.   r/   from_input_exampleso   s    z$TripletEvaluator.from_input_examplesr   )modeloutput_pathr   r   returnc              	   C   sN  |dkr0|dkrd| }q4d| d| d}nd}| j d k	rP|d| j  d7 }td	| j d
| d d}d\}}}	}
| j d krt n
|| j N |j| j| j| j	dd}|j| j
| j| j	dd}|j| j| j| j	dd}W 5 Q R X t||}t||}tj|| dd}tj|| dd}t||}t||}t||}t||}tt|D ]v}|d7 }|| || k r||d7 }|| || k r|d7 }|| || k r|	d7 }	|| || k rV|
d7 }
qV|| }|| }|	| }|
| }td|d  td|d  td|d  td|d  |d k	r| jrtj|| j}tj|st|dddd0}t|}|| j ||||||g W 5 Q R X n:t|dddd$}t|}||||||g W 5 Q R X tj dtj!dtj"dtj#di$| j%d| _&||||t'|||d }| (|| j}| )|| |S )!Nr7   z after epoch z
 in epoch z after z stepsr   z (truncated to )z.TripletEvaluator: Evaluating the model on the z dataset:r   )r   r   r   r   T)r   r   Zconvert_to_numpy)Zaxisr1   z#Accuracy Cosine Distance:   	{:.2f}d   z#Accuracy Dot Product:       	{:.2f}z#Accuracy Manhattan Distance:	{:.2f}z$Accuracy Euclidean Distance:	{:.2f}
wzutf-8)newlinemodeencodingacosine_accuracydot_accuracyeuclidean_accuracymanhattan_accuracymax_accuracy)rC   rD   rF   rE   rG   )*r   r$   infor   r   Ztruncate_sentence_embeddingsencoder   r   r   r   r   r   npsumr
   r	   ranger"   formatr   ospathjoinr)   isfileopencsvwriterwriterowr*   r   ZCOSINEZDOT_PRODUCTZ	EUCLIDEANZ	MANHATTANgetr   Zprimary_metricmaxZprefix_name_to_metricsZ store_metrics_in_model_card_data)r+   r8   r9   r   r   Zout_txtZnum_tripletsZnum_correct_cos_tripletsZnum_correct_dot_tripletsZnum_correct_manhattan_tripletsZnum_correct_euclidean_tripletsZembeddings_anchorsZembeddings_positivesZembeddings_negativesZpos_cos_distanceZneg_cos_distancesZpos_dot_distanceZneg_dot_distancesZpos_manhattan_distanceZneg_manhattan_distancesZpos_euclidean_distanceZneg_euclidean_distancesidxZaccuracy_cosZaccuracy_dotr   r   Zcsv_pathfrT   Zmetricsr.   r.   r/   __call__{   s    







 
     
zTripletEvaluator.__call__)Nr   r   FTN)Nr7   r7   )__name__
__module____qualname____doc__r   strr   r   r   intboolr!   classmethodr   r6   r   floatrZ   __classcell__r.   r.   r,   r/   r      s<   +      4        
r   )rS   r&   rN   
contextlibr   typingr   r   r   r   r   ZnumpyrJ   Zsklearn.metrics.pairwiser   r	   r
   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Zsentence_transformers.readersr   Z*sentence_transformers.similarity_functionsr   Z)sentence_transformers.SentenceTransformerr   	getLoggerr[   r$   r   r.   r.   r.   r/   <module>   s   
