U
    4AfZ                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZ d	d
lmZmZmZ e r~ddlZeeZdZd0e
ee df ee	e dddZe redfejjee	e ejjdddZd1ee ee	e dddZG dd deZ G dd deZ!G dd deZ"G dd de"Z#G dd deZ$G dd  d eZ%G d!d" d"eZ&G d#d$ d$eZ'G d%d& d&eZ(G d'd( d(eZ)G d)d* d*eZ*d+dd+d+d	d+d+d+d+d,	Z+e$e"e#e!e%e&e'e(e)e*d-
Z,d.d.d.d.d.d/d.d.d.d.d-
Z-dS )2zGLUE processors and helpers    N)asdict)Enum)ListOptionalUnion   )PreTrainedTokenizer)is_tf_availablelogging   )DataProcessorInputExampleInputFeaturesu  This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyztf.data.Dataset)examples	tokenizer
max_lengthc                 C   sZ   t tdt t rFt| tjj	rF|dkr6t
dt| |||dS t| |||||dS )a=  
    Loads a data file into a list of `InputFeatures`

    Args:
        examples: List of `InputExamples` or `tf.data.Dataset` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
        output_mode: String indicating the output mode. Either `regression` or `classification`

    Returns:
        If the `examples` input is a `tf.data.Dataset`, will return a `tf.data.Dataset` containing the task-specific
        features. If the input is a list of `InputExamples`, will return a list of task-specific `InputFeatures` which
        can be fed to the model.

    functionNzWWhen calling glue_convert_examples_to_features from TF, the task parameter is required.r   task)r   r   
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarningr	   
isinstancetfdataDataset
ValueError%_tf_glue_convert_examples_to_features"_glue_convert_examples_to_features)r   r   r   r   r   r    r#   E/tmp/pip-unpacked-wheel-zw5xktn0/transformers/data/processors/glue.py!glue_convert_examples_to_features)   s         r%   )r   r   r   returnc                    s   t |  fdd| D } t| |||d |dkr:tjntj} fdd}|j}tjj|dd |D |fd	d |D t	g fS )
zb
        Returns:
            A `tf.data.Dataset` containing the task-specific features.

        c                    s   g | ]}   |qS r#   )Ztfds_mapget_example_from_tensor_dict.0example)	processorr#   r$   
<listcomp>Z   s     z9_tf_glue_convert_examples_to_features.<locals>.<listcomp>r   sts-bc                  3   s8    D ].} dd t |  D }|d}||fV  qd S )Nc                 S   s   i | ]\}}|d k	r||qS Nr#   )r)   kvr#   r#   r$   
<dictcomp>`   s       zF_tf_glue_convert_examples_to_features.<locals>.gen.<locals>.<dictcomp>label)r   itemspop)exdr2   )featuresr#   r$   gen^   s    
z2_tf_glue_convert_examples_to_features.<locals>.genc                 S   s   i | ]}|t jqS r#   )r   Zint32r)   r/   r#   r#   r$   r1   h   s      z9_tf_glue_convert_examples_to_features.<locals>.<dictcomp>c                 S   s   i | ]}|t d gqS r.   )r   TensorShaper9   r#   r#   r$   r1   i   s      )
glue_processorsr%   r   Zfloat32Zint64Zmodel_input_namesr   r   Zfrom_generatorr:   )r   r   r   r   Z
label_typer8   Zinput_namesr#   )r7   r+   r$   r!   N   s    
r!   c                    sd  |d kr|j }|d k	rlt|  }|d krF| }td| d|  d krlt| td d|  dd t|D ttt	t
d f dfddfd	d
| D }|dd
 | D |ddd g }tt| D ]8 fdd D }	tf |	d| i}
||
 qt| d d D ]:\}td td|j  td|   q$|S )NzUsing label list z
 for task zUsing output mode c                 S   s   i | ]\}}||qS r#   r#   )r)   ir2   r#   r#   r$   r1      s      z6_glue_convert_examples_to_features.<locals>.<dictcomp>)r*   r&   c                    s>   | j d krd S dkr  | j  S dkr2t| j S td S )Nclassification
regression)r2   floatKeyError)r*   )	label_mapr   r#   r$   label_from_example   s    


z>_glue_convert_examples_to_features.<locals>.label_from_examplec                    s   g | ]} |qS r#   r#   r(   )rB   r#   r$   r,      s     z6_glue_convert_examples_to_features.<locals>.<listcomp>c                 S   s   g | ]}|j |jfqS r#   )text_atext_br(   r#   r#   r$   r,      s     r   T)r   paddingZ
truncationc                    s   i | ]}| |  qS r#   r#   r9   )batch_encodingr<   r#   r$   r1      s      r2      z*** Example ***zguid: z
features: )Zmodel_max_lengthr;   
get_labelsloggerinfoglue_output_modes	enumerater   r   intr?   rangelenr   appendguid)r   r   r   r   r   r   r+   labelsr7   inputsZfeaturer*   r#   )rF   r<   rB   rA   r   r$   r"   m   s:    
 	
r"   c                   @   s   e Zd ZdZdZdS )
OutputModer=   r>   N)__name__
__module____qualname__r=   r>   r#   r#   r#   r$   rT      s   rT   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                    s$   t  j|| ttdt d S Nr+   super__init__r   r   r   r   r   selfargskwargs	__class__r#   r$   r\      s    zMrpcProcessor.__init__c                 C   s>   t |d  |d  d|d  dt|d  S See base class.idxZ	sentence1utf-8Z	sentence2r2   r   Znumpydecodestrr^   Ztensor_dictr#   r#   r$   r'      s    
z*MrpcProcessor.get_example_from_tensor_dictc                 C   s6   t dtj|d  | | tj|ddS )rd   zLOOKING AT 	train.tsvtrain)rI   rJ   ospathjoin_create_examples	_read_tsvr^   data_dirr#   r#   r$   get_train_examples   s    z MrpcProcessor.get_train_examplesc                 C   s   |  | tj|ddS rd   zdev.tsvdevrp   rq   rm   rn   ro   rr   r#   r#   r$   get_dev_examples   s    zMrpcProcessor.get_dev_examplesc                 C   s   |  | tj|ddS rd   ztest.tsvtestrw   rr   r#   r#   r$   get_test_examples   s    zMrpcProcessor.get_test_examplesc                 C   s   ddgS rd   01r#   r^   r#   r#   r$   rH      s    zMrpcProcessor.get_labelsc           
   	   C   sl   g }t |D ]Z\}}|dkrq| d| }|d }|d }|dkrHdn|d }	|t||||	d q|S )5Creates examples for the training, dev and test sets.r   -r      rz   NrQ   rC   rD   r2   rL   rP   r   
r^   linesset_typer   r<   linerQ   rC   rD   r2   r#   r#   r$   rp      s    zMrpcProcessor._create_examplesrU   rV   rW   __doc__r\   r'   rt   rx   r{   rH   rp   __classcell__r#   r#   ra   r$   rX      s   	rX   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\      s    zMnliProcessor.__init__c                 C   s>   t |d  |d  d|d  dt|d  S )rd   re   Zpremiserf   Z
hypothesisr2   rg   rj   r#   r#   r$   r'      s    
z*MnliProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rd   rk   rl   rw   rr   r#   r#   r$   rt      s    z MnliProcessor.get_train_examplesc                 C   s   |  | tj|ddS )rd   zdev_matched.tsvZdev_matchedrw   rr   r#   r#   r$   rx      s    zMnliProcessor.get_dev_examplesc                 C   s   |  | tj|ddS )rd   ztest_matched.tsvZtest_matchedrw   rr   r#   r#   r$   r{      s    zMnliProcessor.get_test_examplesc                 C   s
   dddgS )rd   Zcontradiction
entailmentZneutralr#   r   r#   r#   r$   rH      s    zMnliProcessor.get_labelsc           
   	   C   sr   g }t |D ]`\}}|dkrq| d|d  }|d }|d }|drNdn|d }	|t||||	d q|S )	r   r   r      	   rz   Nr   )rL   
startswithrP   r   r   r#   r#   r$   rp      s    zMnliProcessor._create_examplesr   r#   r#   ra   r$   r      s   	r   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\   	  s    z MnliMismatchedProcessor.__init__c                 C   s   |  | tj|ddS )rd   zdev_mismatched.tsvZdev_mismatchedrw   rr   r#   r#   r$   rx     s    z(MnliMismatchedProcessor.get_dev_examplesc                 C   s   |  | tj|ddS )rd   ztest_mismatched.tsvZtest_mismatchedrw   rr   r#   r#   r$   r{     s    z)MnliMismatchedProcessor.get_test_examples)rU   rV   rW   r   r\   rx   r{   r   r#   r#   ra   r$   r     s   r   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )ColaProcessorz/Processor for the CoLA data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\     s    zColaProcessor.__init__c                 C   s0   t |d  |d  ddt|d  S rd   re   sentencerf   Nr2   rg   rj   r#   r#   r$   r'     s    
z*ColaProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS r   rw   rr   r#   r#   r$   rt   &  s    z ColaProcessor.get_train_examplesc                 C   s   |  | tj|ddS ru   rw   rr   r#   r#   r$   rx   *  s    zColaProcessor.get_dev_examplesc                 C   s   |  | tj|ddS ry   rw   rr   r#   r#   r$   r{   .  s    zColaProcessor.get_test_examplesc                 C   s   ddgS r|   r#   r   r#   r#   r$   rH   2  s    zColaProcessor.get_labelsc              	   C   sz   |dk}|r|dd }|r dnd}g }t |D ]D\}}| d| }|| }	|rVdn|d }
|t||	d|
d q0|S )r   rz   r   Nr   r   r   r   )r^   r   r   	test_mode
text_indexr   r<   r   rQ   rC   r2   r#   r#   r$   rp   6  s    zColaProcessor._create_examplesr   r#   r#   ra   r$   r     s   	r   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\   H  s    zSst2Processor.__init__c                 C   s0   t |d  |d  ddt|d  S r   rg   rj   r#   r#   r$   r'   L  s    
z*Sst2Processor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS r   rw   rr   r#   r#   r$   rt   U  s    z Sst2Processor.get_train_examplesc                 C   s   |  | tj|ddS ru   rw   rr   r#   r#   r$   rx   Y  s    zSst2Processor.get_dev_examplesc                 C   s   |  | tj|ddS ry   rw   rr   r#   r#   r$   r{   ]  s    zSst2Processor.get_test_examplesc                 C   s   ddgS r|   r#   r   r#   r#   r$   rH   a  s    zSst2Processor.get_labelsc           
   	   C   st   g }|dkrdnd}t |D ]R\}}|dkr.q| d| }|| }|dkrPdn|d }	|t||d|	d q|S )r   rz   r   r   r   Nr   r   )
r^   r   r   r   r   r<   r   rQ   rC   r2   r#   r#   r$   rp   e  s    zSst2Processor._create_examplesr   r#   r#   ra   r$   r   E  s   	r   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )StsbProcessorz0Processor for the STS-B data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\   v  s    zStsbProcessor.__init__c                 C   s>   t |d  |d  d|d  dt|d  S rc   rg   rj   r#   r#   r$   r'   z  s    
z*StsbProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS r   rw   rr   r#   r#   r$   rt     s    z StsbProcessor.get_train_examplesc                 C   s   |  | tj|ddS ru   rw   rr   r#   r#   r$   rx     s    zStsbProcessor.get_dev_examplesc                 C   s   |  | tj|ddS ry   rw   rr   r#   r#   r$   r{     s    zStsbProcessor.get_test_examplesc                 C   s   dgS )rd   Nr#   r   r#   r#   r$   rH     s    zStsbProcessor.get_labelsc           
   	   C   sp   g }t |D ]^\}}|dkrq| d|d  }|d }|d }|dkrLdn|d }	|t||||	d q|S )	r   r   r      r   rz   Nr   r   r   r   r#   r#   r$   rp     s    zStsbProcessor._create_examplesr   r#   r#   ra   r$   r   s  s   	r   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )QqpProcessorz.Processor for the QQP data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\     s    zQqpProcessor.__init__c                 C   s>   t |d  |d  d|d  dt|d  S )rd   re   Z	question1rf   Z	question2r2   rg   rj   r#   r#   r$   r'     s    
z)QqpProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS r   rw   rr   r#   r#   r$   rt     s    zQqpProcessor.get_train_examplesc                 C   s   |  | tj|ddS ru   rw   rr   r#   r#   r$   rx     s    zQqpProcessor.get_dev_examplesc                 C   s   |  | tj|ddS ry   rw   rr   r#   r#   r$   r{     s    zQqpProcessor.get_test_examplesc                 C   s   ddgS r|   r#   r   r#   r#   r$   rH     s    zQqpProcessor.get_labelsc              	   C   s   |dk}|rdnd}|rdnd}g }t |D ]x\}}|dkr>q,| d|d  }	z$|| }
|| }|rjdn|d	 }W n tk
r   Y q,Y nX |t|	|
||d
 q,|S )r   rz   r   r      r   r   r   NrG   r   )rL   
IndexErrorrP   r   )r^   r   r   r   Zq1_indexZq2_indexr   r<   r   rQ   rC   rD   r2   r#   r#   r$   rp     s     
zQqpProcessor._create_examplesr   r#   r#   ra   r$   r     s   	r   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )QnliProcessorz/Processor for the QNLI data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\     s    zQnliProcessor.__init__c                 C   s>   t |d  |d  d|d  dt|d  S )rd   re   Zquestionrf   r   r2   rg   rj   r#   r#   r$   r'     s    
z*QnliProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS r   rw   rr   r#   r#   r$   rt     s    z QnliProcessor.get_train_examplesc                 C   s   |  | tj|ddS ru   rw   rr   r#   r#   r$   rx     s    zQnliProcessor.get_dev_examplesc                 C   s   |  | tj|ddS ry   rw   rr   r#   r#   r$   r{     s    zQnliProcessor.get_test_examplesc                 C   s   ddgS rd   r   Znot_entailmentr#   r   r#   r#   r$   rH     s    zQnliProcessor.get_labelsc           
   	   C   sp   g }t |D ]^\}}|dkrq| d|d  }|d }|d }|dkrLdn|d }	|t||||	d q|S 	r   r   r   r   r   rz   Nr   r   r   r   r#   r#   r$   rp     s    zQnliProcessor._create_examplesr   r#   r#   ra   r$   r     s   	r   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )RteProcessorz.Processor for the RTE data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\     s    zRteProcessor.__init__c                 C   s>   t |d  |d  d|d  dt|d  S rc   rg   rj   r#   r#   r$   r'   
  s    
z)RteProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS r   rw   rr   r#   r#   r$   rt     s    zRteProcessor.get_train_examplesc                 C   s   |  | tj|ddS ru   rw   rr   r#   r#   r$   rx     s    zRteProcessor.get_dev_examplesc                 C   s   |  | tj|ddS ry   rw   rr   r#   r#   r$   r{     s    zRteProcessor.get_test_examplesc                 C   s   ddgS r   r#   r   r#   r#   r$   rH     s    zRteProcessor.get_labelsc           
   	   C   sp   g }t |D ]^\}}|dkrq| d|d  }|d }|d }|dkrLdn|d }	|t||||	d q|S r   r   r   r#   r#   r$   rp   #  s    zRteProcessor._create_examplesr   r#   r#   ra   r$   r     s   	r   c                       sP   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )WnliProcessorz/Processor for the WNLI data set (GLUE version).c                    s$   t  j|| ttdt d S rY   rZ   r]   ra   r#   r$   r\   4  s    zWnliProcessor.__init__c                 C   s>   t |d  |d  d|d  dt|d  S rc   rg   rj   r#   r#   r$   r'   8  s    
z*WnliProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS r   rw   rr   r#   r#   r$   rt   A  s    z WnliProcessor.get_train_examplesc                 C   s   |  | tj|ddS ru   rw   rr   r#   r#   r$   rx   E  s    zWnliProcessor.get_dev_examplesc                 C   s   |  | tj|ddS ry   rw   rr   r#   r#   r$   r{   I  s    zWnliProcessor.get_test_examplesc                 C   s   ddgS r|   r#   r   r#   r#   r$   rH   M  s    zWnliProcessor.get_labelsc           
   	   C   sp   g }t |D ]^\}}|dkrq| d|d  }|d }|d }|dkrLdn|d }	|t||||	d q|S r   r   r   r#   r#   r$   rp   Q  s    zWnliProcessor._create_examplesr   r#   r#   ra   r$   r   1  s   	r   r   )	colamnlimrpcsst-2r-   qqpqnlirtewnli)
r   r   zmnli-mmr   r   r-   r   r   r   r   r=   r>   )NNNN)NNNN).r   rm   r   Zdataclassesr   enumr   typingr   r   r   Ztokenization_utilsr   utilsr	   r
   r   r   r   Z
tensorflowr   Z
get_loggerrU   rI   r   rM   r%   ri   r   r   r!   r"   rT   rX   r   r   r   r   r   r   r   r   r   Zglue_tasks_num_labelsr;   rK   r#   r#   r#   r$   <module>   s   

    #"    7/./..4../