U
    4Aft                     @   s   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 e	
eZdd Zdd	 Zd
d Zdd Zdd Zdd Zd,ddZdd Zdd Zdd Zdd Zdd Zd-dd Zd.d"d#Zd$d% Zd&d' Zd(d) Zd*d+ ZdS )/a  
Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was modified by XLNet authors to
update `find_best_threshold` scripts for SQuAD V2.0

In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an
additional na_prob.json file is provided. This file is expected to map question ID's to the model's predicted
probability that a question is unanswerable.
    N   )BasicTokenizer)loggingc                 C   s4   dd }dd }dd }dd }||||| S )	zALower text and remove punctuation, articles and extra whitespace.c                 S   s   t dt j}t |d| S )Nz\b(a|an|the)\b )recompileUNICODEsub)textregex r   K/tmp/pip-unpacked-wheel-zw5xktn0/transformers/data/metrics/squad_metrics.pyremove_articles'   s    z)normalize_answer.<locals>.remove_articlesc                 S   s   d |  S )Nr   )joinsplitr
   r   r   r   white_space_fix+   s    z)normalize_answer.<locals>.white_space_fixc                    s"   t tj d fdd| D S )N c                 3   s   | ]}| kr|V  qd S Nr   ).0chexcluder   r   	<genexpr>0   s      z8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>)setstringpunctuationr   r   r   r   r   remove_punc.   s    
z%normalize_answer.<locals>.remove_puncc                 S   s   |   S r   )lowerr   r   r   r   r   2   s    znormalize_answer.<locals>.lowerr   )sr   r   r   r   r   r   r   normalize_answer$   s
    r    c                 C   s   | sg S t |  S r   )r    r   )r   r   r   r   
get_tokens8   s    r!   c                 C   s   t t| t|kS r   )intr    )a_golda_predr   r   r   compute_exact>   s    r%   c           	      C   s   t | }t |}t|t|@ }t| }t|dksHt|dkrTt||kS |dkr`dS d| t| }d| t| }d| | ||  }|S )Nr         ?   )r!   collectionsCountersumvalueslenr"   )	r#   r$   Z	gold_toksZ	pred_tokscommonZnum_sameZ	precisionZrecallf1r   r   r   
compute_f1B   s    r/   c                    s   i }i }| D ]x}|j }dd |jD }|s0dg}||krHtd|  q||  t fdd|D ||< t fdd|D ||< q||fS )zV
    Computes the exact and f1 scores from the examples and the model predictions
    c                 S   s    g | ]}t |d  r|d  qS r   )r    )r   Zanswerr   r   r   
<listcomp>[   s      z"get_raw_scores.<locals>.<listcomp>r   zMissing prediction for c                 3   s   | ]}t | V  qd S r   )r%   r   aZ
predictionr   r   r   f   s     z!get_raw_scores.<locals>.<genexpr>c                 3   s   | ]}t | V  qd S r   )r/   r1   r3   r   r   r   g   s     )qas_idanswersprintmax)examplespredsexact_scores	f1_scoresexampler4   Zgold_answersr   r3   r   get_raw_scoresR   s    r=   c                 C   sF   i }|   D ]4\}}|| |k}|r8t||  ||< q|||< q|S r   )itemsfloat)scoresna_probsqid_to_has_ansZna_prob_threshZ
new_scoresqidr   Zpred_nar   r   r   apply_no_ans_thresholdl   s    
rD   c              	      s   |sHt  }tddt   | fddt  | fd|fgS t |}tddt fdd|D  | fddtfdd|D  | fd|fgS d S )Nexact      Y@r.   totalc                 3   s   | ]} | V  qd S r   r   r   k)r:   r   r   r      s     z!make_eval_dict.<locals>.<genexpr>c                 3   s   | ]} | V  qd S r   r   rH   )r;   r   r   r      s     )r,   r(   OrderedDictr*   r+   )r:   r;   qid_listrG   r   )r:   r;   r   make_eval_dictw   s      rL   c                 C   s$   |D ]}|| | | d| < qd S )N_r   )	main_evalZnew_evalprefixrI   r   r   r   
merge_eval   s    rP   c                    s   t fddD }|}|}d}t  fddd}t|D ]R\}	}
|
|krPq>|
 rb||
 }n| |
 rpd}nd}||7 }||kr>|} |
 }q>d	\}}|D ],}
|
 sq|d
7 }|
|krq|||
 7 }qd| t| |d| | fS )Nc                 3   s   | ]} | sd V  qdS    Nr   rH   rB   r   r   r      s      z&find_best_thresh_v2.<locals>.<genexpr>        c                    s    |  S r   r   rI   rA   r   r   <lambda>       z%find_best_thresh_v2.<locals>.<lambda>keyr   )r   r   rR   rF   r&   r*   sorted	enumerater,   )r9   r@   rA   rB   
num_no_ans	cur_score
best_scorebest_threshrK   irC   diffZhas_ans_scoreZhas_ans_cntr   rA   rB   r   find_best_thresh_v2   s4    

rf   c                 C   s\   t ||||\}}}t ||||\}	}
}|| d< || d< |	| d< |
| d< || d< || d< d S )N
best_exactbest_exact_threshbest_f1best_f1_threshhas_ans_exact
has_ans_f1)rf   )rN   r9   	exact_rawf1_rawrA   rB   rg   exact_threshrk   ri   	f1_threshrl   r   r   r   find_all_best_thresh_v2   s    rq   c                    s   t fddD }|}|}d}t  fddd}t|D ]R\}	}
|
|krPq>|
 rb||
 }n| |
 rpd}nd}||7 }||kr>|} |
 }q>d	| t| |fS )
Nc                 3   s   | ]} | sd V  qdS rQ   r   rH   rS   r   r   r      s      z#find_best_thresh.<locals>.<genexpr>rT   c                    s    |  S r   r   rU   rV   r   r   rW      rX   z"find_best_thresh.<locals>.<lambda>rY   r[   r   rF   r\   )r9   r@   rA   rB   r_   r`   ra   rb   rK   rM   rC   rd   r   re   r   find_best_thresh   s$    

rr   c           
      C   sH   t ||||\}}t ||||\}}	|| d< || d< || d< |	| d< d S )Nrg   rh   ri   rj   )rr   )
rN   r9   rm   rn   rA   rB   rg   ro   ri   rp   r   r   r   find_all_best_thresh   s    rs   r&   c                 C   s   dd | D }dd |  D }dd |  D }|d krHdd |D }t| |\}}t||||}	t||||}
t|	|
}|rt|	|
|d}t||d |rt|	|
|d}t||d	 |rt|||||| |S )
Nc                 S   s   i | ]}|j t|jqS r   )r4   boolr5   )r   r<   r   r   r   
<dictcomp>   s      z"squad_evaluate.<locals>.<dictcomp>c                 S   s   g | ]\}}|r|qS r   r   r   r4   Z
has_answerr   r   r   r0      s      z"squad_evaluate.<locals>.<listcomp>c                 S   s   g | ]\}}|s|qS r   r   rv   r   r   r   r0      s      c                 S   s   i | ]
}|d qS )rT   r   rH   r   r   r   ru      s      )rK   ZHasAnsZNoAns)r>   r=   rD   rL   rP   rs   )r8   r9   Zno_answer_probsZno_answer_probability_thresholdZqas_id_to_has_answerZhas_answer_qidsZno_answer_qidsrE   r.   Zexact_thresholdZf1_thresholdZ
evaluationZhas_ans_evalZno_ans_evalr   r   r   squad_evaluate   s.       
rw   Fc                 C   sf  dd }t |d}d||}|| }|dkrT|rPtd|  d| d |S |t|  d	 }||\}	}
||\}}t|	t|kr|rtd
|	 d| d |S i }| D ]\}}|||< qd}||kr|| }||
kr|
| }|dkr|rtd |S d}||kr4|| }||
kr4|
| }|dkrR|rNtd |S |||d	  }|S )z;Project the tokenized prediction back to the original text.c                 S   sP   g }t  }t| D ](\}}|dkr&q||t|< || qd|}||fS )Nr   r   )r(   rJ   r^   r,   appendr   )r
   Zns_charsZns_to_s_maprc   cZns_textr   r   r   _strip_spaces  s    
z%get_final_text.<locals>._strip_spaces)do_lower_caser   r[   zUnable to find text: 'z' in ''rR   z*Length not equal after stripping spaces: 'z' vs 'NzCouldn't map start positionzCouldn't map end position)r   r   tokenizefindloggerinfor,   r>   )Z	pred_text	orig_textr{   verbose_loggingrz   	tokenizertok_textZstart_positionZend_positionZorig_ns_textZorig_ns_to_s_mapZtok_ns_textZtok_ns_to_s_mapZtok_s_to_ns_maprc   Z	tok_indexZorig_start_positionZns_start_positionZorig_end_positionZns_end_positionZoutput_textr   r   r   get_final_text   sL    








r   c                 C   sN   t t| dd dd}g }tt|D ]"}||kr6 qJ||| d  q&|S )z"Get the n-best logits from a list.c                 S   s   | d S )NrR   r   xr   r   r   rW   ^  rX   z#_get_best_indexes.<locals>.<lambda>TrZ   reverser   )r]   r^   ranger,   rx   )Zlogitsn_best_sizeZindex_and_scoreZbest_indexesrc   r   r   r   _get_best_indexes\  s    r   c                 C   s|   | sg S d}| D ]}|dks$||kr|}qg }d}| D ]$}t || }|| ||7 }q6g }|D ]}|||  qd|S )z,Compute softmax probability over raw logits.NrT   )mathexprx   )r@   Z	max_scoreZscoreZ
exp_scoresZ	total_sumr   probsr   r   r   _compute_softmaxh  s     

r   c           8      C   sl  |rt d|  |r(t d|  |r@|
r@t d|  tt}|D ]}||j | qNi }|D ]}|||j< qltdddddd	g}t	 }t	 }t	 }t
| D ]\}}|| }g }d
}d}d}d}t
|D ]*\}}||j }t|j|}t|j|}|
rL|jd |jd  } | |k rL| }|}|jd }|jd }|D ]}!|D ]}"|!t|jkrpqX|"t|jkrqX|!|jkrqX|"|jkrqX|j|!dsqX|"|!k rƐqX|"|! d }#|#|krqX||||!|"|j|! |j|" d qXqPq|
r,|||dd||d t|dd dd}tdddd	g}$i }%g }&|D ]}'t|&|krt qB||'j }|'jdkr|j|'j|'jd  }(|j|'j })|j|'j }*|j|)|*d  }+||(},|, },d|, },d|+}-t|,|-||	}.|.|%krq\d|%|.< nd}.d|%|.< |&|$|.|'j|'jd q\|
rd|%krf|&|$d||d t|&dkr|&d|$dddd |&s|&|$dddd t|&dk rt dg }/d}0|&D ]*}1|/|1j|1j  |0s|1j!r|1}0qt"|/}2g }3t
|&D ]F\}4}1t	 }5|1j!|5d< |2|4 |5d< |1j|5d< |1j|5d	< |3|5 qt|3dk rdt d|
s~|3d d ||j#< n<||0j |0j }6|6||j#< |6|krd||j#< n|0j!||j#< |3||j#< q|rt$|d}7|7%t&j'|ddd   W 5 Q R X |r.t$|d}7|7%t&j'|ddd   W 5 Q R X |rh|
rht$|d}7|7%t&j'|ddd   W 5 Q R X |S )!zHWrite final predictions to the json file and log-odds of null if needed.Writing predictions to: zWriting nbest to: zWriting null_log_odds to: PrelimPredictionfeature_indexstart_index	end_indexstart_logit	end_logit@B r   FrR   )r   r   r   r   r   c                 S   s   | j | j S r   )r   r   r   r   r   r   rW     rX   z,compute_predictions_logits.<locals>.<lambda>Tr   NbestPredictionr
   r   r   )r
   r   r   emptyrT   No valid predictionsNprobabilityw   indent
)(r   r   r(   defaultdictlistexample_indexrx   	unique_id
namedtuplerJ   r^   r   start_logits
end_logitsr,   tokenstoken_to_orig_maptoken_is_max_contextgetr]   r   r   r   
doc_tokensconvert_tokens_to_stringstripr   r   r   r   r   insert
ValueErrorr
   r   r4   openwritejsondumps)8all_examplesall_featuresall_resultsr   max_answer_lengthr{   output_prediction_fileoutput_nbest_fileoutput_null_log_odds_filer   version_2_with_negativeZnull_score_diff_thresholdr   example_index_to_featuresfeatureunique_id_to_resultresult_PrelimPredictionall_predictionsall_nbest_jsonscores_diff_jsonr   r<   featuresprelim_predictions
score_nullZmin_null_feature_indexZnull_start_logitZnull_end_logitr   Zstart_indexesZend_indexesZfeature_null_scorer   r   length_NbestPredictionseen_predictionsnbestpred
tok_tokensorig_doc_startorig_doc_endorig_tokensr   r   
final_texttotal_scoresbest_non_null_entryentryr   
nbest_jsonrc   output
score_diffwriterr   r   r   compute_predictions_logits  s"   
 





		 

	








"""r   c           8      C   s(  t ddddddg}t ddddg}td	|  t t}|D ]}||j | qFi }|D ]}|||j< qdt 	 }t 	 }t 	 }t
| D ]\}}|| }g }d
}t
|D ]\}}||j }|j}t||}t|D ]}t|	D ]}|j| }|j| } ||	 | }!|j|! }"|j|! }#| |jd kr6q|#|jd krHq|j| dsZq|#| k rfq|#|  d }$|$|kr~q|||| |#||"d qqqt|dd dd}i }%g }&|D ]}'t|&|kr q||'j }|j|'j|'jd  }(|j|'j })|j|'j }*|j|)|*d  }+||(},|, },d|,  },d|+}-t!|drZ|j"}.n|j#}.t$|,|-|.|}/|/|%kr|qd|%|/< |&||/|'j%|'j&d q|&s|&|dddd g }0d}1|&D ]"}2|0|2j%|2j&  |1s|2}1qt'|0}3g }4t
|&D ]F\}}2t 	 }5|2j(|5d< |3| |5d< |2j%|5d< |2j&|5d< |4|5 qt|4dk r\t)d|1dkrnt)d|}6|6||j*< |1j(||j*< |4||j*< qt+|d}7|7,t-j.|ddd  W 5 Q R X t+|d}7|7,t-j.|ddd  W 5 Q R X |
r$t+|d}7|7,t-j.|ddd  W 5 Q R X |S )z
    XLNet write prediction logic (more complex than Bert's). Write final predictions to the json file and log-odds of
    null if needed.

    Requires utils_squad_evaluate.py
    r   r   r   r   start_log_probend_log_probr   r
   r   r   rR   F)r   r   r   r   r   c                 S   s   | j | j S r   )r   r   r   r   r   r   rW     rX   z/compute_predictions_log_probs.<locals>.<lambda>Tr   r   r{   )r
   r   r   r   g    .Nr   r   r   r   r   r   )/r(   r   r   r   r   r   r   rx   r   rJ   r^   Z
cls_logitsminr   r   Zstart_top_indexr   Zend_top_indexZparagraph_lenr   r   r]   r,   r   r   r   r   r   r   r   r   r   r   hasattrr{   Zdo_lowercase_and_remove_accentr   r   r   r   r
   r   r4   r   r   r   r   )8r   r   r   r   r   r   r   r   Zstart_n_topZ	end_n_topr   r   r   r   r   r   r   r   r   r   r   r   r   r<   r   r   r   r   Zcur_null_scorerc   jr   r   Zj_indexr   r   r   r   r   r   r   r   r   r   r   r   r{   r   r   r   r   r   r   r   r   r   r   r   r   compute_predictions_log_probsN  s      










  








"""r   )N)Nr&   )F)__doc__r(   r   r   r   r   Zmodels.bertr   utilsr   Z
get_logger__name__r   r    r!   r%   r/   r=   rD   rL   rP   rf   rq   rr   rs   rw   r   r   r   r   r   r   r   r   r   <module>   s6   	

"


^ P