U
    <Af'2                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
Zd dlZd dlZd dlmZmZ d dlmZ d dlmZ erd dlmZ eeZG dd	 d	eZdS )
    N)nullcontext)TYPE_CHECKINGCallableDictOptional)average_precision_score
ndcg_score)SentenceEvaluator)cos_sim)SentenceTransformerc                       s   e Zd ZdZdddedddddf	eeeee	j
e	j
ge	j
f eeeee ee d	 fd	d
Zddeeeeeef dddZdd Zdd Zdd Z  ZS )RerankingEvaluatora  
    This class evaluates a SentenceTransformer model for the task of re-ranking.

    Given a query and a list of documents, it computes the score [query, doc_i] for all possible
    documents and sorts them in decreasing order. Then, MRR@10, NDCG@10 and MAP is compute to measure the quality of the ranking.

    Args:
        samples (list): A list of dictionaries, where each dictionary represents a sample and has the following keys:
            - 'query': The search query.
            - 'positive': A list of positive (relevant) documents.
            - 'negative': A list of negative (irrelevant) documents.
        at_k (int, optional): Only consider the top k most similar documents to each query for the evaluation. Defaults to 10.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        similarity_fct (Callable[[torch.Tensor, torch.Tensor], torch.Tensor], optional): Similarity function between sentence embeddings. By default, cosine similarity. Defaults to cos_sim.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 64.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        use_batched_encoding (bool, optional): Whether or not to encode queries and documents in batches for greater speed, or 1-by-1 to save memory. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation dimension. Defaults to None.
        mrr_at_k (Optional[int], optional): Deprecated parameter. Please use `at_k` instead. Defaults to None.
    
    T@   FN)	at_kname	write_csvsimilarity_fct
batch_sizeshow_progress_baruse_batched_encodingtruncate_dimmrr_at_kc                    s   t    || _|| _|
d k	r8td|
 d |
| _n|| _|| _|| _|| _	|| _
|	| _t| jtrxt| j | _dd | jD | _d|rd| nd d| j d	 | _d
ddd| jd| jg| _|| _d| _d S )Nz?The `mrr_at_k` parameter has been deprecated; please use `at_k=z
` instead.c                 S   s0   g | ](}t |d  dkrt |d dkr|qS )positiver   negativelen.0sample r    W/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/evaluation/RerankingEvaluator.py
<listcomp>M   s      z/RerankingEvaluator.__init__.<locals>.<listcomp>r   _r   z
_results_@z.csvepochstepsZMAPzMRR@{}zNDCG@{}map)super__init__samplesr   loggerwarningr   r   r   r   r   r   
isinstancedictlistvaluescsv_fileformatcsv_headersr   Zprimary_metric)selfr)   r   r   r   r   r   r   r   r   r   	__class__r    r!   r(   ,   s4    
$

zRerankingEvaluator.__init__r   )modeloutput_pathr$   r%   returnc                 C   s  |dkr0|dkrd| }q4d| d| d}nd}| j dk	rP|d| j  d	7 }td
| j d| d | |}|d }|d }|d }	dd | jD }
dd | jD }tdt| jt	|
t
|
t|
t	|t
|t| td|d  td| j|d  td| j|	d  |dk	r| jrtj|| j}tj|}t|d|rtdnddd6}t|}|s|| j ||||||	g W 5 Q R X d|d| j |d| j |	i}| || j}| || |S )a  
        Evaluates the model on the dataset and returns the evaluation metrics.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to evaluate.
            output_path (str, optional): The output path to write the results. Defaults to None.
            epoch (int, optional): The current epoch number. Defaults to -1.
            steps (int, optional): The current step number. Defaults to -1.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        r6   z after epoch z
 in epoch z after z stepsr   Nz (truncated to )z0RerankingEvaluator: Evaluating the model on the z dataset:r&   mrrndcgc                 S   s   g | ]}t |d  qS )r   r   r   r    r    r!   r"   }   s     z/RerankingEvaluator.__call__.<locals>.<listcomp>c                 S   s   g | ]}t |d  qS )r   r   r   r    r    r!   r"   ~   s     zmQueries: {} 	 Positives: Min {:.1f}, Mean {:.1f}, Max {:.1f} 	 Negatives: Min {:.1f}, Mean {:.1f}, Max {:.1f}zMAP: {:.2f}d   zMRR@{}: {:.2f}zNDCG@{}: {:.2f}awzutf-8)newlinemodeencodingzmrr@zndcg@)r   r*   infor   compute_metricesr)   r1   r   npminmeanmaxr   r   ospathjoinr0   isfileopencsvwriterwriterowr2   Zprefix_name_to_metricsZ store_metrics_in_model_card_data)r3   r7   r8   r$   r%   Zout_txtZscoresmean_apmean_mrr	mean_ndcgZnum_positivesZnum_negativesZcsv_pathZoutput_file_existsfrP   Zmetricsr    r    r!   __call__\   s\    


 
 
 zRerankingEvaluator.__call__c                 C   s   | j r| |S | |S )a  
        Computes the evaluation metrics for the given model.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        )r   compute_metrices_batchedcompute_metrices_individual)r3   r7   r    r    r!   rE      s    z#RerankingEvaluator.compute_metricesc              	   C   s  g }g }g }| j dkrt n
|| j j |jdd | jD d| j| jd}g }| jD ] }||d  ||d  qV|j|d| j| jd}W 5 Q R X d\}	}
| jD ]"}||	 }|	d	7 }	t|d }t|d }||
|
| |  }|
|| 7 }
|d
ks|d
krq| 	||}t|j
d	kr,|d
 }t| }|  }d	g| d
g|  }d
}t|d
| j D ]&\}}|| rnd	|d	  } qqn|| |t|g|g| jd |t|| qt|}t|}t|}|||dS )aE  
        Computes the evaluation metrics in a batched way, by batching all queries and all documents together.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        Nc                 S   s   g | ]}|d  qS )queryr    r   r    r    r!   r"      s     z?RerankingEvaluator.compute_metrices_batched.<locals>.<listcomp>TZconvert_to_tensorr   r   r   r   )r   r      r   kr&   r<   r=   )r   r   truncate_sentence_embeddingsencoder)   r   r   extendr   r   shapetorchargsortcputolist	enumerater   appendr   r   rF   rH   )r3   r7   all_mrr_scoresall_ndcg_scoresall_ap_scoresZall_query_embsZall_docsr   Zall_docs_embsZ	query_idxZdocs_idxinstance	query_embnum_posZnum_negdocs_embpred_scorespred_scores_argsortis_relevant	mrr_scorerankindexrR   rS   rT   r    r    r!   rW      s`    

   





z+RerankingEvaluator.compute_metrices_batchedc              
   C   s  g }g }g }t j | j| j ddD ]`}|d }t|d }t|d }t|dks"t|dkrbq"|| }	dgt| dgt|  }
| jdkrt n
|| j0 |j|gd	| j	d
d}|j|	d	| j	d
d}W 5 Q R X | 
||}t|jdkr|d }t| }|  }d}t|d| j D ]&\}}|
| r(d|d  } qPq(|| |t|
g|g| jd |t|
| q"t|}t|}t|}|||dS )aO  
        Computes the evaluation metrics individually by embedding every (query, positive, negative) tuple individually.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        ZSamples)disabledescrY   r   r   r   r[   NTFrZ   r\   r^   )tqdmr)   r   r.   r   r   r   r_   r`   r   r   rb   rc   rd   re   rf   rg   r   rh   r   r   rF   rH   )r3   r7   ri   rj   rk   rl   rY   r   r   Zdocsrr   rm   ro   rp   rq   rs   rt   ru   rR   rS   rT   r    r    r!   rX      sR    
      





z.RerankingEvaluator.compute_metrices_individual)Nr6   r6   )__name__
__module____qualname____doc__r
   intstrboolr   rc   ZTensorr   r(   r   floatrV   rE   rW   rX   __classcell__r    r    r4   r!   r      sD   1        
GJr   )rO   loggingrJ   
contextlibr   typingr   r   r   r   ZnumpyrF   rc   rx   Zsklearn.metricsr   r   Z2sentence_transformers.evaluation.SentenceEvaluatorr	   Zsentence_transformers.utilr
   Z)sentence_transformers.SentenceTransformerr   	getLoggerry   r*   r   r    r    r    r!   <module>   s   
