U
    <A·f(  ã                   @   s®   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZmZmZ d dlmZ d dlmZ d dlmZ erd dlmZ e e¡ZG d	d
„ d
eƒZdS )é    N)Únullcontext)ÚTYPE_CHECKINGÚDictÚListÚOptionalÚUnion)Úpaired_cosine_distancesÚpaired_euclidean_distancesÚpaired_manhattan_distances)ÚSentenceEvaluator)ÚInputExample)ÚSimilarityFunction)ÚSentenceTransformerc                       sŠ   e Zd ZdZdee ee ee eeeef  ee	e
e
ee	 dœ	‡ fdd	„Zeee d
œdd„ƒZddee	e	eeef dœdd„Z‡  ZS )ÚTripletEvaluatora  
    Evaluate a model based on a triplet: (sentence, positive_example, negative_example).
    Checks if distance(sentence, positive_example) < distance(sentence, negative_example).

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TripletEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load a dataset with (anchor, positive, negative) triplets
            dataset = load_dataset("sentence-transformers/all-nli", "triplet", split="dev")

            # Initialize the TripletEvaluator using anchors, positives, and negatives
            triplet_evaluator = TripletEvaluator(
                anchors=dataset[:1000]["anchor"],
                positives=dataset[:1000]["positive"],
                negatives=dataset[:1000]["negative"],
                name="all-nli-dev",
            )
            results = triplet_evaluator(model)
            '''
            TripletEvaluator: Evaluating the model on the all-nli-dev dataset:
            Accuracy Cosine Distance:        95.60
            Accuracy Dot Product:            4.40
            Accuracy Manhattan Distance:     95.40
            Accuracy Euclidean Distance:     95.60
            '''
            print(triplet_evaluator.primary_metric)
            # => "all-nli-dev_max_accuracy"
            print(results[triplet_evaluator.primary_metric])
            # => 0.956
    NÚ é   FT)	ÚanchorsÚ	positivesÚ	negativesÚmain_distance_functionÚnameÚ
batch_sizeÚshow_progress_barÚ	write_csvÚtruncate_dimc
           
         sÎ   t ƒ  ¡  || _|| _|| _|| _|	| _t| jƒt| jƒks@t‚t| jƒt| jƒksXt‚|rdt	|ƒnd| _
|| _|dkr”t ¡ tjkp’t ¡ tjk}|| _d|r¨d| nd d | _dddd	d
g| _|| _dS )að  
        Initializes a TripletEvaluator object.

        Args:
            anchors (List[str]): Sentences to check similarity to. (e.g. a query)
            positives (List[str]): List of positive sentences
            negatives (List[str]): List of negative sentences
            main_distance_function (Union[str, SimilarityFunction], optional):
                The distance function to use. If not specified, use cosine similarity,
                dot product, Euclidean, and Manhattan. Defaults to None.
            name (str): Name for the output. Defaults to "".
            batch_size (int): Batch size used to compute embeddings. Defaults to 16.
            show_progress_bar (bool): If true, prints a progress bar. Defaults to False.
            write_csv (bool): Write results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate sentence embeddings to.
                `None` uses the model's current truncation dimension. Defaults to None.
        NZtriplet_evaluationÚ_r   z_results.csvÚepochÚstepsZaccuracy_cosinusÚaccuracy_manhattanÚaccuracy_euclidean)ÚsuperÚ__init__r   r   r   r   r   ÚlenÚAssertionErrorr   r   r   ÚloggerÚgetEffectiveLevelÚloggingÚINFOÚDEBUGr   Úcsv_fileÚcsv_headersr   )
Úselfr   r   r   r   r   r   r   r   r   ©Ú	__class__© úU/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/evaluation/TripletEvaluator.pyr!   ;   s"    
ÿzTripletEvaluator.__init__)Úexamplesc                 K   sV   g }g }g }|D ]4}|  |jd ¡ |  |jd ¡ |  |jd ¡ q| |||f|ŽS )Nr   é   é   )ÚappendZtexts)Úclsr0   Úkwargsr   r   r   Zexampler.   r.   r/   Úfrom_input_exampleso   s    z$TripletEvaluator.from_input_exampleséÿÿÿÿr   )ÚmodelÚoutput_pathr   r   Úreturnc              	   C   sN  |dkr0|dkrd|› }q4d|› d|› d}nd}| j d k	rP|d| j › d7 }t d	| j› d
|› d¡ d}d\}}}	}
| j d krŠtƒ n
| | j ¡N |j| j| j| j	dd}|j| j
| j| j	dd}|j| j| j| j	dd}W 5 Q R X t||ƒ}t||ƒ}tj|| dd}tj|| dd}t||ƒ}t||ƒ}t||ƒ}t||ƒ}tt|ƒƒD ]v}|d7 }|| || k r||d7 }|| || k r–|d7 }|| || k r°|	d7 }	|| || k rV|
d7 }
qV|| }|| }|	| }|
| }t d |d ¡¡ t d |d ¡¡ t d |d ¡¡ t d |d ¡¡ |d k	rð| jrðtj || j¡}tj |¡s¶t|dddd0}t |¡}| | j¡ | |||||g¡ W 5 Q R X n:t|dddd$}t |¡}| |||||g¡ W 5 Q R X tj dtj!dtj"dtj#di $| j%d¡| _&||||t'|||ƒd œ}|  (|| j¡}|  )||¡ |S )!Nr7   z after epoch z
 in epoch z after z stepsr   z (truncated to ú)z.TripletEvaluator: Evaluating the model on the z datasetú:r   )r   r   r   r   T)r   r   Zconvert_to_numpy)Zaxisr1   z#Accuracy Cosine Distance:   	{:.2f}éd   z#Accuracy Dot Product:       	{:.2f}z#Accuracy Manhattan Distance:	{:.2f}z$Accuracy Euclidean Distance:	{:.2f}
Úwzutf-8)ÚnewlineÚmodeÚencodingÚaÚcosine_accuracyÚdot_accuracyÚeuclidean_accuracyÚmanhattan_accuracyÚmax_accuracy)rC   rD   rF   rE   rG   )*r   r$   Úinfor   r   Ztruncate_sentence_embeddingsÚencoder   r   r   r   r   r   ÚnpÚsumr
   r	   Úranger"   Úformatr   ÚosÚpathÚjoinr)   ÚisfileÚopenÚcsvÚwriterÚwriterowr*   r   ZCOSINEZDOT_PRODUCTZ	EUCLIDEANZ	MANHATTANÚgetr   Zprimary_metricÚmaxZprefix_name_to_metricsZ store_metrics_in_model_card_data)r+   r8   r9   r   r   Zout_txtZnum_tripletsZnum_correct_cos_tripletsZnum_correct_dot_tripletsZnum_correct_manhattan_tripletsZnum_correct_euclidean_tripletsZembeddings_anchorsZembeddings_positivesZembeddings_negativesZpos_cos_distanceZneg_cos_distancesZpos_dot_distanceZneg_dot_distancesZpos_manhattan_distanceZneg_manhattan_distancesZpos_euclidean_distanceZneg_euclidean_distancesÚidxZaccuracy_cosZaccuracy_dotr   r   Zcsv_pathÚfrT   Zmetricsr.   r.   r/   Ú__call__{   s¶    
ûüüü





 
    ü û
ûzTripletEvaluator.__call__)Nr   r   FTN)Nr7   r7   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Ústrr   r   r   ÚintÚboolr!   Úclassmethodr   r6   r   ÚfloatrZ   Ú__classcell__r.   r.   r,   r/   r      s<   +      öö4     ÿ   
þr   )rS   r&   rN   Ú
contextlibr   Útypingr   r   r   r   r   ZnumpyrJ   Zsklearn.metrics.pairwiser   r	   r
   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Zsentence_transformers.readersr   Z*sentence_transformers.similarity_functionsr   Z)sentence_transformers.SentenceTransformerr   Ú	getLoggerr[   r$   r   r.   r.   r.   r/   Ú<module>   s   