U
    <AfL1                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlmZ d dlmZ ertd dlmZ eeZG dd	 d	eZdS )
    N)defaultdict)nullcontext)TYPE_CHECKINGDictListOptionalTuple)SentenceEvaluator)paraphrase_mining)SentenceTransformerc                       s   e Zd ZdZdeeef eeeef  eeeeef f ee	e	e	e	ee	eee
e	 d fddZddee	e	eeef dddZedd Z  ZS )ParaphraseMiningEvaluatora  
    Given a large set of sentences, this evaluator performs paraphrase (duplicate) mining and
    identifies the pairs with the highest similarity. It compare the extracted paraphrase pairs
    with a set of gold labels and computes the F1 score.

    Example:
        ::

            from datasets import load_dataset
            from sentence_transformers.SentenceTransformer import SentenceTransformer
            from sentence_transformers.evaluation import ParaphraseMiningEvaluator

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load the Quora Duplicates Mining dataset
            questions_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "questions", split="dev")
            duplicates_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "duplicates", split="dev")

            # Create a mapping from qid to question & a list of duplicates (qid1, qid2)
            qid_to_questions = dict(zip(questions_dataset["qid"], questions_dataset["question"]))
            duplicates = list(zip(duplicates_dataset["qid1"], duplicates_dataset["qid2"]))

            # Initialize the paraphrase mining evaluator
            paraphrase_mining_evaluator = ParaphraseMiningEvaluator(
                sentences_map=qid_to_questions,
                duplicates_list=duplicates,
                name="quora-duplicates-dev",
            )
            results = paraphrase_mining_evaluator(model)
            '''
            Paraphrase Mining Evaluation of the model on the quora-duplicates-dev dataset:
            Number of candidate pairs: 250564
            Average Precision: 56.51
            Optimal threshold: 0.8325
            Precision: 52.76
            Recall: 59.19
            F1: 55.79
            '''
            print(paraphrase_mining_evaluator.primary_metric)
            # => "quora-duplicates-dev_average_precision"
            print(results[paraphrase_mining_evaluator.primary_metric])
            # => 0.5650940787776353
    NF  順   d       T)sentences_mapduplicates_listduplicates_dictadd_transitive_closurequery_chunk_sizecorpus_chunk_size	max_pairstop_kshow_progress_bar
batch_sizename	write_csvtruncate_dimc                    s  t    g | _g | _| D ] \}}| j| | j| q|| _|	| _|
| _|| _	|| _
|| _|| _|| _|dk	r||n
tdd | _|dk	r|D ]4\}}||kr||krd| j| |< d| j| |< q|r| | j| _t }| jD ]X}| j| D ]H}||kr||kr| j| | s*| j| | r|tt||g qqt|| _|r\d| }d| d | _dd	d
ddddg| _|| _d| _dS )a	  
        Initializes the ParaphraseMiningEvaluator.

        Args:
            sentences_map (Dict[str, str]): A dictionary that maps sentence-ids to sentences.
                For example, sentences_map[id] => sentence.
            duplicates_list (List[Tuple[str, str]], optional): A list with id pairs [(id1, id2), (id1, id5)]
                that identifies the duplicates / paraphrases in the sentences_map. Defaults to None.
            duplicates_dict (Dict[str, Dict[str, bool]], optional): A default dictionary mapping [id1][id2]
                to true if id1 and id2 are duplicates. Must be symmetric, i.e., if [id1][id2] => True,
                then [id2][id1] => True. Defaults to None.
            add_transitive_closure (bool, optional): If true, it adds a transitive closure,
                i.e. if dup[a][b] and dup[b][c], then dup[a][c]. Defaults to False.
            query_chunk_size (int, optional): To identify the paraphrases, the cosine-similarity between
                all sentence-pairs will be computed. As this might require a lot of memory, we perform
                a batched computation. query_chunk_size sentences will be compared against up to
                corpus_chunk_size sentences. In the default setting, 5000 sentences will be grouped
                together and compared up-to against 100k other sentences. Defaults to 5000.
            corpus_chunk_size (int, optional): The corpus will be batched, to reduce the memory requirement.
                Defaults to 100000.
            max_pairs (int, optional): We will only extract up to max_pairs potential paraphrase candidates.
                Defaults to 500000.
            top_k (int, optional): For each query, we extract the top_k most similar pairs and add it to a sorted list.
                I.e., for one sentence we cannot find more than top_k paraphrases. Defaults to 100.
            show_progress_bar (bool, optional): Output a progress bar. Defaults to False.
            batch_size (int, optional): Batch size for computing sentence embeddings. Defaults to 16.
            name (str, optional): Name of the experiment. Defaults to "".
            write_csv (bool, optional): Write results to CSV file. Defaults to True.
            truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to.
                `None` uses the model's current truncation dimension. Defaults to None.
        Nc                   S   s   t tS )N)r   bool r!   r!   ^/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/evaluation/ParaphraseMiningEvaluator.py<lambda>       z4ParaphraseMiningEvaluator.__init__.<locals>.<lambda>T_Zparaphrase_mining_evaluationz_results.csvepochsteps	precisionrecallf1	thresholdaverage_precision)super__init__	sentencesidsitemsappendr   r   r   r   r   r   r   r   r   
duplicatesr   setaddtuplesortedlentotal_num_duplicatescsv_filecsv_headersr   Zprimary_metric)selfr   r   r   r   r   r   r   r   r   r   r   r   r   idZsentenceid1id2Zpositive_key_pairskey1key2	__class__r!   r"   r.   ?   sR    /


z"ParaphraseMiningEvaluator.__init__r   )modeloutput_pathr&   r'   returnc              
   C   s  |dkr0|dkrd| }q4d| d| d}nd}| j d k	rP|d| j  d7 }td	| j d
| d | j d krzt n
|| j * t|| j| j| j	| j
| j| j| j}W 5 Q R X tdtt|  d }}d}	d }
 }}d}tt|D ]}|| \}}}| j| }| j| }|d7 }| j| | s@| j| | r|d7 }|| }|| j }d| | ||  }||7 }||
kr|}
|}|}|| d |t|d t|d  d  d }	q|| j }td|d  td|	 td|d  td|d  td|
d  |d k	r| jrtj|| j}tj|st|dddd4}t|}|| j  ||||||
|	|g W 5 Q R X n>t|dddd(}t|}||||||
|	|g W 5 Q R X ||
|||	d}| !|| j}| "|| |S )NrD   z after epoch z
 in epoch z after z stepsr   z (truncated to )z1Paraphrase Mining Evaluation of the model on the z dataset:zNumber of candidate pairs: r         zAverage Precision: {:.2f}r   zOptimal threshold: {:.4f}zPrecision: {:.2f}zRecall: {:.2f}zF1: {:.2f}
wzutf-8)newlinemodeencodinga)r,   r*   r(   r)   r+   )#r   loggerinfor   r   Ztruncate_sentence_embeddingsr
   r/   r   r   r   r   r   r   strr8   ranger0   r3   r9   minformatr   ospathjoinr:   isfileopencsvwriterwriterowr;   Zprefix_name_to_metricsZ store_metrics_in_model_card_data)r<   rE   rF   r&   r'   Zout_txtZ
pairs_listZ	n_extractZ	n_correctr+   Zbest_f1Zbest_recallZbest_precisionr,   idxZscoreijr>   r?   r(   r)   r*   Zcsv_pathfr]   Zmetricsr!   r!   r"   __call__   s    



0

$
"z"ParaphraseMiningEvaluator.__call__c                 C   s   t  }t|  D ]}||krt  }|| t| | }t|dkrr|d}||kr:|| || |  q:t|}tt|d D ]`}t|d t|D ]H}d| ||  || < d| ||  || < |||  |||  qqq| S )Nr   rJ   T)r4   listkeysr5   r8   popextendrT   )graphZnodes_visitedrP   Zconnected_subgraph_nodesZneighbor_nodes_queuenoder`   ra   r!   r!   r"   r      s&    


z0ParaphraseMiningEvaluator.add_transitive_closure)NNFr   r   r   r   Fr   r   TN)NrD   rD   )__name__
__module____qualname____doc__r   rS   r   r   r    intr   r.   floatrc   staticmethodr   __classcell__r!   r!   rB   r"   r      sP   0            
`        
Wr   )r\   loggingrW   collectionsr   
contextlibr   typingr   r   r   r   r   Z2sentence_transformers.evaluation.SentenceEvaluatorr	   Zsentence_transformers.utilr
   Z)sentence_transformers.SentenceTransformerr   	getLoggerrj   rQ   r   r!   r!   r!   r"   <module>   s   
