U
    <A·fL1  ã                   @   s’   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlmZ d dlmZ ertd dlmZ e e¡ZG dd	„ d	eƒZdS )
é    N)Údefaultdict)Únullcontext)ÚTYPE_CHECKINGÚDictÚListÚOptionalÚTuple)ÚSentenceEvaluator)Úparaphrase_mining)ÚSentenceTransformerc                       s”   e Zd ZdZdeeef eeeef  eeeeef f ee	e	e	e	ee	eee
e	 dœ‡ fdd„Zddee	e	eeef dœdd„Zedd„ ƒZ‡  ZS )ÚParaphraseMiningEvaluatoraù  
    Given a large set of sentences, this evaluator performs paraphrase (duplicate) mining and
    identifies the pairs with the highest similarity. It compare the extracted paraphrase pairs
    with a set of gold labels and computes the F1 score.

    Example:
        ::

            from datasets import load_dataset
            from sentence_transformers.SentenceTransformer import SentenceTransformer
            from sentence_transformers.evaluation import ParaphraseMiningEvaluator

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load the Quora Duplicates Mining dataset
            questions_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "questions", split="dev")
            duplicates_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "duplicates", split="dev")

            # Create a mapping from qid to question & a list of duplicates (qid1, qid2)
            qid_to_questions = dict(zip(questions_dataset["qid"], questions_dataset["question"]))
            duplicates = list(zip(duplicates_dataset["qid1"], duplicates_dataset["qid2"]))

            # Initialize the paraphrase mining evaluator
            paraphrase_mining_evaluator = ParaphraseMiningEvaluator(
                sentences_map=qid_to_questions,
                duplicates_list=duplicates,
                name="quora-duplicates-dev",
            )
            results = paraphrase_mining_evaluator(model)
            '''
            Paraphrase Mining Evaluation of the model on the quora-duplicates-dev dataset:
            Number of candidate pairs: 250564
            Average Precision: 56.51
            Optimal threshold: 0.8325
            Precision: 52.76
            Recall: 59.19
            F1: 55.79
            '''
            print(paraphrase_mining_evaluator.primary_metric)
            # => "quora-duplicates-dev_average_precision"
            print(results[paraphrase_mining_evaluator.primary_metric])
            # => 0.5650940787776353
    NFéˆ  é † é ¡ éd   é   Ú T)Úsentences_mapÚduplicates_listÚduplicates_dictÚadd_transitive_closureÚquery_chunk_sizeÚcorpus_chunk_sizeÚ	max_pairsÚtop_kÚshow_progress_barÚ
batch_sizeÚnameÚ	write_csvÚtruncate_dimc                    sŽ  t ƒ  ¡  g | _g | _| ¡ D ] \}}| j |¡ | j |¡ q|| _|	| _|
| _|| _	|| _
|| _|| _|| _|dk	r||n
tdd„ ƒ| _|dk	rÌ|D ]4\}}||kr–||kr–d| j| |< d| j| |< q–|rÞ|  | j¡| _tƒ }| jD ]X}| j| D ]H}||krø||krø| j| | s*| j| | rø| tt||gƒƒ¡ qøqêt|ƒ| _|r\d| }d| d | _dd	d
ddddg| _|| _d| _dS )a	  
        Initializes the ParaphraseMiningEvaluator.

        Args:
            sentences_map (Dict[str, str]): A dictionary that maps sentence-ids to sentences.
                For example, sentences_map[id] => sentence.
            duplicates_list (List[Tuple[str, str]], optional): A list with id pairs [(id1, id2), (id1, id5)]
                that identifies the duplicates / paraphrases in the sentences_map. Defaults to None.
            duplicates_dict (Dict[str, Dict[str, bool]], optional): A default dictionary mapping [id1][id2]
                to true if id1 and id2 are duplicates. Must be symmetric, i.e., if [id1][id2] => True,
                then [id2][id1] => True. Defaults to None.
            add_transitive_closure (bool, optional): If true, it adds a transitive closure,
                i.e. if dup[a][b] and dup[b][c], then dup[a][c]. Defaults to False.
            query_chunk_size (int, optional): To identify the paraphrases, the cosine-similarity between
                all sentence-pairs will be computed. As this might require a lot of memory, we perform
                a batched computation. query_chunk_size sentences will be compared against up to
                corpus_chunk_size sentences. In the default setting, 5000 sentences will be grouped
                together and compared up-to against 100k other sentences. Defaults to 5000.
            corpus_chunk_size (int, optional): The corpus will be batched, to reduce the memory requirement.
                Defaults to 100000.
            max_pairs (int, optional): We will only extract up to max_pairs potential paraphrase candidates.
                Defaults to 500000.
            top_k (int, optional): For each query, we extract the top_k most similar pairs and add it to a sorted list.
                I.e., for one sentence we cannot find more than top_k paraphrases. Defaults to 100.
            show_progress_bar (bool, optional): Output a progress bar. Defaults to False.
            batch_size (int, optional): Batch size for computing sentence embeddings. Defaults to 16.
            name (str, optional): Name of the experiment. Defaults to "".
            write_csv (bool, optional): Write results to CSV file. Defaults to True.
            truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to.
                `None` uses the model's current truncation dimension. Defaults to None.
        Nc                   S   s   t tƒS )N)r   Úbool© r!   r!   ú^/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/evaluation/ParaphraseMiningEvaluator.pyÚ<lambda>   ó    z4ParaphraseMiningEvaluator.__init__.<locals>.<lambda>TÚ_Zparaphrase_mining_evaluationz_results.csvÚepochÚstepsÚ	precisionÚrecallÚf1Ú	thresholdÚaverage_precision)ÚsuperÚ__init__Ú	sentencesÚidsÚitemsÚappendr   r   r   r   r   r   r   r   r   Ú
duplicatesr   ÚsetÚaddÚtupleÚsortedÚlenÚtotal_num_duplicatesÚcsv_fileÚcsv_headersr   Zprimary_metric)Úselfr   r   r   r   r   r   r   r   r   r   r   r   r   ÚidZsentenceÚid1Úid2Zpositive_key_pairsÚkey1Úkey2©Ú	__class__r!   r"   r.   ?   sR    /

ÿþýý
z"ParaphraseMiningEvaluator.__init__éÿÿÿÿr   )ÚmodelÚoutput_pathr&   r'   Úreturnc              
   C   s  |dkr0|dkrd|› }q4d|› d|› d}nd}| j d k	rP|d| j › d7 }t d	| j› d
|› d¡ | j d krztƒ n
| | j ¡* t|| j| j| j	| j
| j| j| jƒ}W 5 Q R X t dtt|ƒƒ ¡ d }}d}	d }
 }}d}tt|ƒƒD ]Ä}|| \}}}| j| }| j| }|d7 }| j| | s@| j| | rô|d7 }|| }|| j }d| | ||  }||7 }||
krô|}
|}|}|| d |t|d t|ƒd ƒ d  d }	qô|| j }t d |d ¡¡ t d |	¡¡ t d |d ¡¡ t d |d ¡¡ t d |
d ¡¡ |d k	rÞ| jrÞtj || j¡}tj |¡s t|dddd4}t |¡}| | j ¡ | |||||
|	|g¡ W 5 Q R X n>t|dddd(}t |¡}| |||||
|	|g¡ W 5 Q R X ||
|||	dœ}|  !|| j¡}|  "||¡ |S )NrD   z after epoch z
 in epoch z after z stepsr   z (truncated to ú)z1Paraphrase Mining Evaluation of the model on the z datasetú:zNumber of candidate pairs: r   é   é   zAverage Precision: {:.2f}r   zOptimal threshold: {:.4f}zPrecision: {:.2f}zRecall: {:.2f}zF1: {:.2f}
Úwzutf-8)ÚnewlineÚmodeÚencodingÚa)r,   r*   r(   r)   r+   )#r   ÚloggerÚinfor   r   Ztruncate_sentence_embeddingsr
   r/   r   r   r   r   r   r   Ústrr8   Úranger0   r3   r9   ÚminÚformatr   ÚosÚpathÚjoinr:   ÚisfileÚopenÚcsvÚwriterÚwriterowr;   Zprefix_name_to_metricsZ store_metrics_in_model_card_data)r<   rE   rF   r&   r'   Zout_txtZ
pairs_listZ	n_extractZ	n_correctr+   Zbest_f1Zbest_recallZbest_precisionr,   ÚidxZscoreÚiÚjr>   r?   r(   r)   r*   Zcsv_pathÚfr]   Zmetricsr!   r!   r"   Ú__call__ž   s‚    
ø


0

$
"ûz"ParaphraseMiningEvaluator.__call__c                 C   sò   t ƒ }t|  ¡ ƒD ]Ú}||krt ƒ }| |¡ t| | ƒ}t|ƒdkrr| d¡}||kr:| |¡ | | | ¡ q:t|ƒ}tt|ƒd ƒD ]`}t|d t|ƒƒD ]H}d| ||  || < d| ||  || < | || ¡ | || ¡ q qŠq| S )Nr   rJ   T)r4   ÚlistÚkeysr5   r8   ÚpopÚextendrT   )ÚgraphZnodes_visitedrP   Zconnected_subgraph_nodesZneighbor_nodes_queueÚnoder`   ra   r!   r!   r"   r   õ   s&    


z0ParaphraseMiningEvaluator.add_transitive_closure)NNFr   r   r   r   Fr   r   TN)NrD   rD   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   rS   r   r   r    Úintr   r.   Úfloatrc   Ústaticmethodr   Ú__classcell__r!   r!   rB   r"   r      sP   0            ò
ò`     ÿ   
þWr   )r\   ÚloggingrW   Úcollectionsr   Ú
contextlibr   Útypingr   r   r   r   r   Z2sentence_transformers.evaluation.SentenceEvaluatorr	   Zsentence_transformers.utilr
   Z)sentence_transformers.SentenceTransformerr   Ú	getLoggerrj   rQ   r   r!   r!   r!   r"   Ú<module>   s   