U
    4Af                    @   s  d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlZdd	lmZ dd
lmZ ddlmZm Z m!Z!m"Z" ddl#m$Z$ e"%e&Z'e
( j)Z*ee+j,j,a-t-j,j,a.eG dd dZ/ddddddddddddZ0e1e2dddZ3e1e4ddd Z5e1ee1 d!d"d#Z6de1e4e1d$d%d&Z7de1e1eee1ef  eee1ef  e2e1d(d)d*Z8dee1ej9f e1eee1ef  eee1ef  e2d+d,d-Z:e1e/e/ee1e1f d.d/d0Z;d1d2 Z<ee1ej9f e1d3d4d5Z=d6d7d8d9d:d;d<Z>ej?d=ej@d>ZAd?d@ ZBdee1ej9f e/e/ee1 e2ee1 dBdCdDZCdeee1ej9f  eee1  eee1ej9f  dEdFdGZDde1eee1  ee1eeee f f dHdIdJZEej?dKej@d>ZFde1eee1eeee f f  e1dLdMdNZGdOdP ZHe?dQZIde1eee1  ee1ee1 f dHdRdSZJdeee1  dTdUdVZKdee1ej9f eee1  e2dWdXdYZLde/e/eee1  e2dZd[d\ZMe/e/d]d^d_ZNd`dadbgdcgddgdegdfgdggdhgdiZOe/e/ee1ee1 f djdkdlZPdmZQdee1ej9f e/e/eee1ej9f  eee1  dndodpZRdqdr ZSde1e/e2eee1  ee1 dsdtduZTedvdwdxZUG dydz dze$ZVde1ee1 ee ee ee1 ed{d|d}ZWe1e2d~ddZXdd ZYdS )    N)ArgumentParser	Namespace)	dataclass)date)chain)Path)AnyCallableDictListOptionalPatternTupleUnion   )auto)model_type_to_module_name)is_flax_availableis_tf_availableis_torch_availablelogging   )BaseTransformersCLICommandc                   @   s   e Zd ZU dZeed< eed< dZee ed< dZee ed< dZ	ee ed< dZ
ee ed< dZee ed	< dZee ed
< dZee ed< dZee ed< dZee ed< dd ZdS )ModelPatternsa  
    Holds the basic information about a new model for the add-new-model-like command.

    Args:
        model_name (`str`): The model name.
        checkpoint (`str`): The checkpoint to use for doc examples.
        model_type (`str`, *optional*):
            The model type, the identifier used internally in the library like `bert` or `xlm-roberta`. Will default to
            `model_name` lowercased with spaces replaced with minuses (-).
        model_lower_cased (`str`, *optional*):
            The lowercased version of the model name, to use for the module name or function names. Will default to
            `model_name` lowercased with spaces and minuses replaced with underscores.
        model_camel_cased (`str`, *optional*):
            The camel-cased version of the model name, to use for the class names. Will default to `model_name`
            camel-cased (with spaces and minuses both considered as word separators.
        model_upper_cased (`str`, *optional*):
            The uppercased version of the model name, to use for the constant names. Will default to `model_name`
            uppercased with spaces and minuses replaced with underscores.
        config_class (`str`, *optional*):
            The tokenizer class associated with this model. Will default to `"{model_camel_cased}Config"`.
        tokenizer_class (`str`, *optional*):
            The tokenizer class associated with this model (leave to `None` for models that don't use a tokenizer).
        image_processor_class (`str`, *optional*):
            The image processor class associated with this model (leave to `None` for models that don't use an image
            processor).
        feature_extractor_class (`str`, *optional*):
            The feature extractor class associated with this model (leave to `None` for models that don't use a feature
            extractor).
        processor_class (`str`, *optional*):
            The processor class associated with this model (leave to `None` for models that don't use a processor).
    
model_name
checkpointN
model_typemodel_lower_casedmodel_camel_casedmodel_upper_casedconfig_classtokenizer_classimage_processor_classfeature_extractor_classprocessor_classc                 C   s   | j d kr| j dd| _ | jd krD| j dddd| _| jd kr| jd}ttdd |D  }dd |D }d	|| _| j
d kr| j dddd| _
| jd kr| j d| _d S )	N -_c                 S   s   g | ]}| d qS )r&   )split.0w r,   L/tmp/pip-unpacked-wheel-zw5xktn0/transformers/commands/add_new_model_like.py
<listcomp>`   s     z/ModelPatterns.__post_init__.<locals>.<listcomp>c                 S   s$   g | ]}|d    |dd  qS )r   r   N)upperr)   r,   r,   r-   r.   b   s      Config)r   r   lowerreplacer   r   r(   listr   joinr   r/   r    )selfwordsr,   r,   r-   __post_init__X   s    




zModelPatterns.__post_init__)__name__
__module____qualname____doc__str__annotations__r   r   r   r   r   r    r!   r"   r#   r$   r8   r,   r,   r,   r-   r   *   s   
 r   z[CONFIG_CLASS]z[TOKENIZER_CLASS]z[IMAGE_PROCESSOR_CLASS]z[FEATURE_EXTRACTOR_CLASS]z[PROCESSOR_CLASS]z[CHECKPOINT]z[MODEL_TYPE]z[MODEL_UPPER_CASED]z[MODEL_CAMELCASED]z[MODEL_LOWER_CASED]z[MODEL_NAME])r    r!   r"   r#   r$   r   r   r   r   r   r   )linereturnc                 C   s   t | dkp|  S )z4
    Determines whether a line is empty or not.
    r   )lenisspacer?   r,   r,   r-   is_empty_liney   s    rD   c                 C   s(   t d| }|dkrdS t| d S )z@
    Returns the number of spaces that start a line indent.
    z^(\s*)(?:\S|$)Nr   )researchrA   groups)r?   rF   r,   r,   r-   find_indent   s    rH   )contentr@   c                 C   s   g }g }|  d}ddddg}|D ]}t|dk}|rRt|dkrR|d d }t|st|dkr|r||kr|| |d| g }q|d| |g}q"|| q"t|dkr|d| |S )	z
    Parse the content of a module in the list of objects it defines.

    Args:
        content (`str`): The content to parse

    Returns:
        `List[str]`: The list of objects defined in the module.
    
)]}"""r   r   z# Copied from)r(   rA   
startswithrD   rH   appendr5   )rI   objectscurrent_objectlinesend_markersr?   is_valid_objectr,   r,   r-   parse_module_content   s&    


rV   )rI   indent_levelr@   c                 C   s   g }|  d}ddddg}t|D ]\}}|dkrh|dkrht|sht||krhtd| dt| d	t||k rt|s qt|dk}t|s|d
st||kr|r| |kr|| d	|  S || q"t|dkrd	|S dS )aE  Return the first block in `content` with the indent level `indent_level`.

    The first line in `content` should be indented at `indent_level` level, otherwise an error will be thrown.

    This method will immediately stop the search when a (non-empty) line with indent level less than `indent_level` is
    encountered.

    Args:
        content (`str`): The content to parse
        indent_level (`int`, *optional*, default to 0): The indent level of the blocks to search for

    Returns:
        `str`: The first block in `content` with the indent level `indent_level`.
    rJ   rK   rL   rM   rN   r   zNWhen `indent_level > 0`, the first line in `content` should have indent level z. Got z	 instead.:N)
r(   	enumeraterD   rH   
ValueErrorrA   endswithlstriprP   r5   )rI   rW   rR   rS   rT   idxr?   rU   r,   r,   r-   extract_block   s2    
$

r^   F)textrI   	add_after
add_beforeexact_matchr@   c                    s   |dkr|dkrt d|dk	r0|dk	r0t d|dkr<|n| fdd}g }| dD ]F}||r|dk	rz|| || |dk	r|| q\|| q\d|S )a  
    A utility to add some content inside a given text.

    Args:
       text (`str`): The text in which we want to insert some content.
       content (`str`): The content to add.
       add_after (`str` or `Pattern`):
           The pattern to test on a line of `text`, the new content is added after the first instance matching it.
       add_before (`str` or `Pattern`):
           The pattern to test on a line of `text`, the new content is added before the first instance matching it.
       exact_match (`bool`, *optional*, defaults to `False`):
           A line is considered a match with `add_after` or `add_before` if it matches exactly when `exact_match=True`,
           otherwise, if `add_after`/`add_before` is present in the line.

    <Tip warning={true}>

    The arguments `add_after` and `add_before` are mutually exclusive, and one exactly needs to be provided.

    </Tip>

    Returns:
        `str`: The text with the new content added if a match was found.
    Nz3You need to pass either `add_after` or `add_before`z/You can't pass both `add_after` or `add_before`c                    s0   t tr| d k	S  r$| kS | kS d S N)
isinstancer   rF   rC   rb   patternr,   r-   this_is_the_line  s
    
z-add_content_to_text.<locals>.this_is_the_linerJ   )rZ   r(   rP   r5   )r_   rI   r`   ra   rb   rg   	new_linesr?   r,   re   r-   add_content_to_text   s     

ri   )	file_namerI   r`   ra   rb   c              	   C   s\   t | ddd}| }W 5 Q R X t|||||d}t | ddd}|| W 5 Q R X dS )a  
    A utility to add some content inside a given file.

    Args:
       file_name (`str` or `os.PathLike`): The name of the file in which we want to insert some content.
       content (`str`): The content to add.
       add_after (`str` or `Pattern`):
           The pattern to test on a line of `text`, the new content is added after the first instance matching it.
       add_before (`str` or `Pattern`):
           The pattern to test on a line of `text`, the new content is added before the first instance matching it.
       exact_match (`bool`, *optional*, defaults to `False`):
           A line is considered a match with `add_after` or `add_before` if it matches exactly when `exact_match=True`,
           otherwise, if `add_after`/`add_before` is present in the line.

    <Tip warning={true}>

    The arguments `add_after` and `add_before` are mutually exclusive, and one exactly needs to be provided.

    </Tip>
    rutf-8encoding)r`   ra   rb   r+   N)openreadri   write)rj   rI   r`   ra   rb   fZold_contentnew_contentr,   r,   r-   add_content_to_file  s        rt   )r_   old_model_patternsnew_model_patternsr@   c           	      C   s  dg}dD ]*}t ||dk	r
t ||dk	r
|| q
|j|j|jfkrR|d |j|jkrj|d ntd|j dd| } |j|jkr|j}t	| d	| dk	rt| d
d| } n
|d |
dddg |D ]}| t ||t| } qg }t D ]@\}}|| kr|t ||t ||f | |t ||} qdd |D }tt|t|krt| dfS t|}dd |D }| d|fS )az  
    Replace all patterns present in a given text.

    Args:
        text (`str`): The text to treat.
        old_model_patterns (`ModelPatterns`): The patterns for the old model.
        new_model_patterns (`ModelPatterns`): The patterns for the new model.

    Returns:
        `Tuple(str, str)`: A tuple of with the treated text and the replacement actually done in it.
    r    )r!   r"   r#   r$   Nr   r   z(\s*)model_type = ""z\1model_type = "[MODEL_TYPE]"z_[A-Z_]*[^A-Z_]z([A-Z_]*)([^a-zA-Z_])z[MODEL_UPPER_CASED]\1\2r   r   r   r   c                 S   s   g | ]\}}|qS r,   r,   r*   oldnewr,   r,   r-   r.   }  s     z*replace_model_patterns.<locals>.<listcomp>r0   c                 S   s   g | ]\}}| d | qS )z->r,   rx   r,   r,   r-   r.     s     ,)getattrrP   r   r   r   rE   subr   r   rF   extendr3   ATTRIBUTE_TO_PLACEHOLDERitemsrA   setsimplify_replacementsr5   )	r_   ru   rv   Zattributes_to_checkattrZold_model_valuereplacementsplaceholderZold_replacement_valuesr,   r,   r-   replace_model_patternsD  s@    


r   c                 C   s   t | dkr| S | jdd d d}|t | k r| | \}}|d }|t | k r| | \}}||||krx| | qD|d7 }qD|d7 }q$| S )a  
    Simplify a list of replacement patterns to make sure there are no needless ones.

    For instance in the sequence "Bert->BertNew, BertConfig->BertNewConfig, bert->bert_new", the replacement
    "BertConfig->BertNewConfig" is implied by "Bert->BertNew" so not needed.

    Args:
        replacements (`List[Tuple[str, str]]`): List of patterns (old, new)

    Returns:
        `List[Tuple[str, str]]`: The list of patterns simplified.
    r   c                 S   s   t | d S Nr   )rA   xr,   r,   r-   <lambda>      z'simplify_replacements.<locals>.<lambda>keyr   )rA   sortr3   pop)r   r]   ry   rz   jZold_2Znew_2r,   r,   r-   r     s    

r   )module_filer@   c                 C   sj   t |  }|dj}t|d }|dkrB|| dkrB|d8 }q$|dk rXt|  dd||d S )zA
    Returns the module name corresponding to a module file.
    r0   r   r   transformersz is not a transformers module..N)r   absolutewith_suffixpartsrA   rZ   r5   )r   Zfull_module_pathmodule_partsr]   r,   r,   r-   get_module_from_file  s    
r   r   r    r!   r"   r#   r$   )z_CHECKPOINT_FOR_DOC =z_CONFIG_FOR_DOC =z_TOKENIZER_FOR_DOC =z_IMAGE_PROCESSOR_FOR_DOC =z_FEAT_EXTRACTOR_FOR_DOC =z_PROCESSOR_FOR_DOC =z(^(?:class|def)\s+([^\s:\(]+)\s*(?:\(|\:)flagsc           
      C   s.  |  tj}d}t|D ]D\}}| | dr>|} q^q| d| dr|} q^q|dkrj| S || }t|}td||d |}t	| d}t
|D ]}d||| < qt
|d ddD ]H}|| }| ds| d	rt||krd||< q qqtjd
d |D }	|	S )zRemove `target_attr` in `obj`.Nz = zdef (rJ   r   #@c                 S   s   g | ]}|d k	r|qS rc   r,   )r*   r   r,   r,   r-   r.     s      z%remove_attributes.<locals>.<listcomp>)r(   oslineseprY   r\   rO   rH   r^   r5   rA   range)
objtarget_attrrS   Z
target_idxr]   r?   rW   parsedZ	num_linesZnew_objr,   r,   r-   remove_attributes  s0    ,
r   T)r   ru   rv   	dest_fileadd_copied_fromattrs_to_removec              	   C   s  |dkrt | |j|j}t| ddd}| }W 5 Q R X tddt d|}t|}g }	|D ]}
d}t	
 D ]:\}}||
krt|
t||t||}
|	|
 d	} qqt|rqd|
}t|
||\}
}tjd
|
tjddk	}|rD|sDt|
dk	rDt|dkrDt| }t| d }t|
d| d| d| td}
tdd|
}
|	|
 qdd|	}|dk	r|D ]}t||d}qvt|ddd}|| W 5 Q R X dS )a\  
    Create a new module from an existing one and adapting all function and classes names from old patterns to new ones.

    Args:
        module_file (`str` or `os.PathLike`): Path to the module to duplicate.
        old_model_patterns (`ModelPatterns`): The patterns for the old model.
        new_model_patterns (`ModelPatterns`): The patterns for the new model.
        dest_file (`str` or `os.PathLike`, *optional*): Path to the new module.
        add_copied_from (`bool`, *optional*, defaults to `True`):
            Whether or not to add `# Copied from` statements in the duplicated module.
    Nrk   rl   rm   z# Copyright (\d+)\sz# Copyright r%   FTz^#\s+Copied fromr   r   z# Copied from r   z with )ra   z
[ ]+# Copied from [^
]*
rJ   )r   r+   )r=   r3   r   ro   rp   rE   r}   CURRENT_YEARrV   SPECIAL_PATTERNSr   r|   rP   r   rF   	MULTILINE_re_class_funcrA   r   rG   ri   r5   r   rq   )r   ru   rv   r   r   r   rr   rI   rQ   Znew_objectsr   Zspecial_patternrf   r   Zold_objreplacementZhas_copied_frommodule_nameZold_object_namer,   r,   r-   duplicate_module  sN     
*  

r   )files
frameworksr@   c                    s   |dkrt  }i  g }| D ]T}t|jd}d|krB|| qd|krT| d< qd|krf| d< q| d< q fdd|D | S )	aW  
    Filter a list of files to only keep the ones corresponding to a list of frameworks.

    Args:
        files (`List[Union[str, os.PathLike]]`): The list of files to filter.
        frameworks (`List[str]`, *optional*): The list of allowed frameworks.

    Returns:
        `List[Union[str, os.PathLike]]`: The list of filtered files.
    Nr'   modelingtfflaxptc                    s   g | ]}| kr | qS r,   r,   r*   rr   Zframework_to_filer,   r-   r.   V  s      z*filter_framework_files.<locals>.<listcomp>)get_default_frameworksr   namer(   rP   )r   r   Zothersrr   r   r,   r   r-   filter_framework_files8  s    



r   )r   r   r@   c              	      s   t |  td   }t|d}t||d}td d d d |  d }d	  d
d  d
d  d
d  d
d  d
d  d
d  d
g}t||d} fdd|D }dd |D }|| |dS )a2  
    Retrieves all the files associated to a model.

    Args:
        model_type (`str`): A valid model type (like "bert" or "gpt2")
        frameworks (`List[str]`, *optional*):
            If passed, will only keep the model files corresponding to the passed frameworks.

    Returns:
        `Dict[str, Union[Path, List[Path]]]`: A dictionary with the following keys:
        - **doc_file** -- The documentation file for the model.
        - **model_files** -- All the files in the model module.
        - **test_files** -- The test files for the model.
    modelsz*.pyr   docssourceen	model_doc.mdZtest_modeling_z.pyZtest_modeling_tf_Ztest_modeling_flax_Ztest_tokenization_Ztest_image_processing_Ztest_feature_extraction_Ztest_processor_c                    s    g | ]}t d  d   | qS )testsr   )	REPO_PATHr   r   r,   r-   r.   |  s     z#get_model_files.<locals>.<listcomp>c                 S   s   g | ]}|  r|qS r,   )existsr   r,   r,   r-   r.   ~  s      )doc_filemodel_filesr   
test_files)r   TRANSFORMERS_PATHr4   globr   r   )r   r   Zmodel_moduler   r   r   r,   r   r-   get_model_filesY  s"    






	r   z$^_CHECKPOINT_FOR_DOC\s+=\s+(\S*)\s*$)r   r   r@   c                 C   s   |dkrt | }|d }|D ]}dt|kr.qt|dddZ}| }t|dk	rt| d }|dd	}|d
d	}|W  5 Q R    S W 5 Q R X qd	S )a  
    Finds the model checkpoint used in the docstrings for a given model.

    Args:
        model_type (`str`): A valid model type (like "bert" or "gpt2")
        model_files (`Dict[str, Union[Path, List[Path]]`, *optional*):
            The files associated to `model_type`. Can be passed to speed up the function, otherwise will be computed.

    Returns:
        `str`: The checkpoint used.
    Nr   r   rk   rl   rm   r   rw   r0   ')r   r=   ro   rp   _re_checkpoint_for_docrF   rG   r3   )r   r   Zmodule_filesfnamerr   rI   r   r,   r,   r-   find_base_model_checkpoint  s     r   c                  C   s8   g } t  r| d t r$| d t r4| d | S )zk
    Returns the list of frameworks (PyTorch, TensorFlow, Flax) that are installed in the environment.
    r   r   r   )r   rP   r   r   r   r,   r,   r-   r     s    


r   zMODEL_([A-Z_]*)MAPPING_NAMESc           	      C   s   |dkrt  }t rtjndt r(tjndt r6tjndd}i }|D ]}g }|| dkrjtd| ddd t	|| D }|D ](}t
|| |}| |kr|||   qt|dkrFtt|||< qF|S )a  
    Retrieve the model classes associated to a given model.

    Args:
        model_type (`str`): A valid model type (like "bert" or "gpt2")
        frameworks (`List[str]`, *optional*):
            The frameworks to look for. Will default to `["pt", "tf", "flax"]`, passing a smaller list will restrict
            the classes returned.

    Returns:
        `Dict[str, List[str]]`: A dictionary with one key per framework and the list of model classes associated to
        that framework as values.
    Nr   r   r   zYou selected z, in the frameworks, but it is not installed.c                 S   s   g | ]}t |d k	r|qS rc   )_re_model_mappingrF   )r*   r   r,   r,   r-   r.     s      z*retrieve_model_classes.<locals>.<listcomp>r   )r   r   auto_moduleZmodeling_autor   Zmodeling_tf_autor   Zmodeling_flax_autorZ   dirr|   rP   rA   r4   r   )	r   r   modulesmodel_classes	frameworkZnew_model_classesZmodel_mappingsZmodel_mapping_nameZmodel_mappingr,   r,   r-   retrieve_model_classes  s&    r   r   c                    sv  | t jkrt|  dt j|  }t jj|  }| t jjkrdt jj|  }|d dk	rZ|d n|d }nd}t jj	| d}t j
j	| d}t jj	| d}t| |d}	|dd}
g  |	d D ]J}d	t|kr؈ d
 qdt|kr d qdt|kr d q|dkrt } fdd|D }t| |d}|
 }t|t| |	d| |
|	d ||||||d}|||	|dS )a  
    Retrieves all the information from a given model_type.

    Args:
        model_type (`str`): A valid model type (like "bert" or "gpt2")
        frameworks (`List[str]`, *optional*):
            If passed, will only keep the info corresponding to the passed frameworks.

    Returns:
        `Dict`: A dictionary with the following keys:
        - **frameworks** (`List[str]`): The list of frameworks that back this model type.
        - **model_classes** (`Dict[str, List[str]]`): The model classes implemented for that model type.
        - **model_files** (`Dict[str, Union[Path, List[Path]]]`): The files associated with that model type.
        - **model_patterns** (`ModelPatterns`): The various patterns for the model.
     is not a valid model type.r   Nr   r   r1   r0   r   Zmodeling_tfr   Zmodeling_flaxr   r   r   c                    s   g | ]}| kr|qS r,   r,   r   Zavailable_frameworksr,   r-   r.     s      z+retrieve_info_for_model.<locals>.<listcomp>)r   r   )
r   r   r   r   r   r    r!   r"   r#   r$   )r   r   r   model_patterns)r   MODEL_NAMES_MAPPINGrZ   configuration_autoZCONFIG_MAPPING_NAMESZtokenization_autoZTOKENIZER_MAPPING_NAMESZimage_processing_autoZIMAGE_PROCESSOR_MAPPING_NAMESgetZfeature_extraction_autoZFEATURE_EXTRACTOR_MAPPING_NAMESZprocessing_autoZPROCESSOR_MAPPING_NAMESr   r3   r=   rP   r   r   r/   r   r   )r   r   r   r    Ztokenizer_classesr!   r"   r#   r$   r   r   r   r   r   r   r,   r   r-   retrieve_info_for_model  sX    



r   )	init_filer   keep_processingc              	      s   dkrt   ddi fdddD }|s>|ddd	g t|d
krNdS d|}td| d}td}td}td| d}t| ddd}	|	 }
W 5 Q R X |
d}g }d
}|t|k rp|	|| dk	r~|	||d  dk	r~|
  |d7 }t|| s0|	|| dkr<|d7 }q|d7 }t|| }t|| |kspt|| rn|d7 }qPq|	|| dk	r|| }|D ]V}|d| dd}|d| dd}|d| dd}|d| dd}qt| d
kr|| |d7 }q|sNt	d|| dkrft	d|| dkrf|||  |d7 }q|d7 }qt| ddd}	|	d| W 5 Q R X dS ) ag  
    Removes all the import lines that don't belong to a given list of frameworks or concern tokenizers/feature
    extractors/image processors/processors in an init.

    Args:
        init_file (`str` or `os.PathLike`): The path to the init to treat.
        frameworks (`List[str]`, *optional*):
           If passed, this will remove all imports that are subject to a framework not in frameworks
        keep_processing (`bool`, *optional*, defaults to `True`):
            Whether or not to keep the preprocessing (tokenizer, feature extractor, image processor, processor) imports
            in the init.
    Nr   Ztorchc                    s    g | ]}| kr ||qS r,   )r   r   r   namesr,   r-   r.   =  s      z,clean_frameworks_in_init.<locals>.<listcomp>r   ZsentencepieceZ
tokenizersZvisionr   |z^\s*if not is_(z)_available\(\):\s*$z\s*try:z\s*else:zis_(z)_availablerk   rl   rm   rJ   r   z, is_Z
_availabler0   is_z_available, z_available,zB^\s*"(tokenization|processing|feature_extraction|image_processing)zG^\s*from .(tokenization|processing|feature_extraction|image_processing)r+   )r   r~   rA   r5   rE   compilero   rp   r(   rF   r   rD   rH   r3   striprP   rq   )r   r   r   Z	to_removeZremove_patternZre_conditional_importsZre_tryZre_elseZre_is_xxx_availablerr   rI   rS   rh   r]   indentr?   r   r,   r   r-   clean_frameworks_in_init*  sd    



," 



r   )ru   rv   r   with_processingc              	   C   s  t td ddd}| }W 5 Q R X |d}d}g }d}	|t|k rd}
t|| spt|| dkrpd}	nR||  d	rd
}	d}
n6||  drd}	d}
n||  drd}	d}
|
r|| 	 dkr|
||  |d7 }q|	dk	r$|dk	r$|	|kr$|
||  |d7 }q<td| j d|| dk	r||| g}t|| }|d7 }t|| |kr|
||  |d7 }qb|| 	 dkr|
||  |d7 }d|}|
| d}|s^| j| j| j| jg}dd |D }|D ]b}|d| dd}|d| dd}|d| dd}|d| d}||krd}q|r|
t|| |d  q<|
||  |d7 }q<t td ddd}|d| W 5 Q R X dS ) a  
    Add a model to the main init of Transformers.

    Args:
        old_model_patterns (`ModelPatterns`): The patterns for the old model.
        new_model_patterns (`ModelPatterns`): The patterns for the new model.
        frameworks (`List[str]`, *optional*):
            If specified, only the models implemented in those frameworks will be added.
        with_processsing (`bool`, *optional*, defaults to `True`):
            Whether the tokenizer/feature extractor/processor of the model should also be added to the init or not.
    __init__.pyrk   rl   rm   rJ   r   NFzif not is_torch_availabler   Tzif not is_tf_availabler   zif not is_flax_availabler   zelse:r   zmodels.z( |"))rK   rL   z],c                 S   s   g | ]}|d k	r|qS rc   r,   r*   cr,   r,   r-   r.     s      z*add_model_to_main_init.<locals>.<listcomp>z "z",r0   z, "rw   r%   r{   , r+   )ro   r   rp   r(   rA   rD   rH   r\   rO   r   rP   rE   rF   r   r5   r!   r"   r#   r$   r3   r   rq   )ru   rv   r   r   rr   rI   rS   r]   rh   r   Znew_frameworkblockr   Z	add_blockZprocessing_classesZprocessing_classr,   r,   r-   add_model_to_main_inity  sv    


 




r   )ru   rv   c           	   	   C   sn  | j dks|j dkrdS ttd d d ddd}| }W 5 Q R X |d}d	}|| d
sl|d7 }qT|d7 }|| ds|| dr|| }n4g }|| ds|||  |d7 }qd|}|d7 }d| j	 d|krt| j |krtqqt|
| j	|j	}|
| j |j }|d| |g ||d  }ttd d d ddd}|d| W 5 Q R X dS )z
    Add a tokenizer to the relevant mappings in the auto module.

    Args:
        old_model_patterns (`ModelPatterns`): The patterns for the old model.
        new_model_patterns (`ModelPatterns`): The patterns for the new model.
    Nr   r   ztokenization_auto.pyrk   rl   rm   rJ   r   z*    TOKENIZER_MAPPING_NAMES = OrderedDict(r   z$TOKENIZER_MAPPING = _LazyAutoMappingr{   z            ),rw   r+   )r!   ro   r   rp   r(   rO   r[   rP   r5   r   r3   rq   )	ru   rv   rr   rI   rS   r]   r   	new_blockrh   r,   r,   r-   insert_tokenizer_in_auto_module  s2    




r   z)        ("{model_type}", "{model_name}"),z+        ("{model_type}", "{config_class}"),z5        ("{model_type}", "{pretrained_archive_map}"),z6        ("{model_type}", "{feature_extractor_class}"),z4        ("{model_type}", "{image_processor_class}"),z+        ("{model_type}", "{any_pt_class}"),z+        ("{model_type}", "{any_tf_class}"),z-        ("{model_type}", "{any_flax_class}"),z.        ("{model_type}", "{processor_class}"),)zconfiguration_auto.pyzfeature_extraction_auto.pyzimage_processing_auto.pyzmodeling_auto.pyzmodeling_tf_auto.pyzmodeling_flax_auto.pyzprocessing_auto.py)ru   rv   r   c           	   	      s  t D ]}g }t | D ]tddk	rhtd d   |krf| fdd|  D  qdkr|d| j qdkr| jdk	r|jdk	r|d| j qdkr| j	dk	r|j	dk	r|d| j	 qd	kr"| j
dk	r,|j
dk	r,|d	| j
 q| q|D ]|td
 d | }}}dD ]:}|d| d t| |}|d| d t||}qR|| j|j}t|||d q2qt| | dS )aV  
    Add a model to the relevant mappings in the auto module.

    Args:
        old_model_patterns (`ModelPatterns`): The patterns for the old model.
        new_model_patterns (`ModelPatterns`): The patterns for the new model.
        model_classes (`Dict[str, List[str]]`): A dictionary framework to list of model classes implemented.
    zany_([a-z]*)_classNr   c                    s(   g | ] } d d  d d |qS ){Zany__classrM   )r3   )r*   clsr   rf   r,   r-   r.   #  s   z-add_model_to_auto_classes.<locals>.<listcomp>z{config_class}z{image_processor_class}z{feature_extractor_class}z{processor_class}r   r   )r   r   r   rM   )r`   )AUTO_CLASSES_PATTERNSrE   rF   rG   r~   rP   r3   r    r"   r#   r$   r   r|   r   rt   r   )	ru   rv   r   filenameZnew_patternsZ	full_nameZold_model_lineZnew_model_liner   r,   r   r-   add_model_to_auto_classes  s\    

 r   a  ## Overview

The {model_name} model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
<INSERT SHORT SUMMARY HERE>

The abstract from the paper is the following:

*<INSERT PAPER ABSTRACT HERE>*

Tips:

<INSERT TIPS ABOUT MODEL HERE>

This model was contributed by [INSERT YOUR HF USERNAME HERE](https://huggingface.co/<INSERT YOUR HF USERNAME HERE>).
The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).

)r   ru   rv   r   r   c              	   C   s  t | ddd}| }W 5 Q R X tddt d|}|dkrFt }|dkrdt| j|j d }|	d	}g }g }	|D ]0}
|

d
r|d	|	 |
g}	qz|	|
 qz|d	|	 g }d}|D ]}|
d
s|| qtd|dk	r|d|j d	 q|s\|j|	d	d kr\d}|tj|jd t|||\}}|| q|rd}|	d	d }td| d }t|||\}}d|kr|j|jkr|| qd|kr|j|jkr|| qd|kr|j|jkr|| qd|kr&|j|jkr|| q|
drHd|kr|| q|
drjd|kr|| qt|	ddkrd|kr|| q|| qt |ddd}|d	| W 5 Q R X dS )a  
    Duplicate a documentation file and adapts it for a new model.

    Args:
        module_file (`str` or `os.PathLike`): Path to the doc file to duplicate.
        old_model_patterns (`ModelPatterns`): The patterns for the old model.
        new_model_patterns (`ModelPatterns`): The patterns for the new model.
        dest_file (`str` or `os.PathLike`, *optional*): Path to the new doc file.
            Will default to the a file named `{new_model_patterns.model_type}.md` in the same folder as `module_file`.
        frameworks (`List[str]`, *optional*):
            If passed, will only keep the model classes corresponding to this list of frameworks in the new doc file.
    rk   rl   rm   z<!--\s*Copyright (\d+)\sz<!--Copyright r%   Nr   rJ   r   Fz^#\s+\S+z# r   T)r   z^#+\s+(\S.*)$	TokenizerImageProcessorFeatureExtractor	ProcessorZFlaxr   ZTFr   r   r   r+   )ro   rp   rE   r}   r   r   r   parentr   r(   rO   rP   r5   rF   r   r    DOC_OVERVIEW_TEMPLATEformatr   rG   r!   r"   r#   r$   rA   rq   )r   ru   rv   r   r   rr   rI   rS   blocksZcurrent_blockr?   Z
new_blocksZ
in_classesr   r   r'   Zblock_titleZblock_classr,   r,   r-   duplicate_doc_filee  sr    










r  c              	   C   s  t d d d d }t|ddd}t|}W 5 Q R X d}|| d	 d
krV|d7 }q<|| d }d}|| d	 dkr|d7 }qf|| d }| j}	d}
|
t|k rdd ||
 d D }d|	 |krq|
d7 }
q|
t|kr| j}|j}td| d| d dS d|j |jd}||
 d | t	||
 d dd d||
 d< ||| d< ||| d< t|ddd}|
tj|dd W 5 Q R X dS )z
    Insert the new model in the doc TOC, in the same section as the old model.

    Args:
        old_model_patterns (`ModelPatterns`): The patterns for the old model.
        new_model_patterns (`ModelPatterns`): The patterns for the new model.
    r   r   r   z_toctree.ymlrk   utf8rm   r   titleZAPIr   sectionsZModelsc                 S   s   g | ]}|d  qS )localr,   )r*   entryr,   r,   r-   r.     s     z+insert_model_in_doc_toc.<locals>.<listcomp>z
model_doc/zDid not find z2 in the table of content, so you will need to add z
 manually.N)r  r  c                 S   s   | d   S )Nr  )r2   )sr,   r,   r-   r     r   z)insert_model_in_doc_toc.<locals>.<lambda>r   r+   rl   T)Zallow_unicode)r   ro   yamlZ	safe_loadr   rA   r   printrP   sortedrq   dump)ru   rv   Ztoc_filerr   rI   Zapi_idxZapi_docZ	model_idxr   old_model_typeZsection_idxr  Z	old_modelZ	new_model	toc_entryr,   r,   r-   insert_model_in_doc_toc  s<    


 r  r   rv   r   r   old_checkpointc              	   C   s  t | |d}|d }|d }|dk	r*||_t|jdkr@tdd}dD ]}	t||	t||	krHd	}qH|d
 }
|d }td |j }tj|dd |d }|rdd |D }tj|dd |D ]8}|j	
|j|j}|| }t|||||od|kd qt|d || d ttd d d|j dd| ddd t|||| d |d }|rddd |D }ttddd}d	}td d |j }tj|dd t|d d W 5 Q R X |D ]N}|j	
|j|j}|jj|j | }t||||d	dd gd! |||B }q|rtd" t|||
 td# d$ d% d& |j d' }t||||d t|| |j|jkrtd(|j d)|j d*|j d+ n0|j|jkrtd(|j d)|j d*|j d+ |j|jkr|j|jkrtd,|j d-|j d*|j d. |s|jdk	rtd/ dS )0a  
    Creates a new model module like a given model of the Transformers library.

    Args:
        model_type (`str`): The model type to duplicate (like "bert" or "gpt2")
        new_model_patterns (`ModelPatterns`): The patterns for the new model.
        add_copied_from (`bool`, *optional*, defaults to `True`):
            Whether or not to add "Copied from" statements to all classes in the new model modeling files.
        frameworks (`List[str]`, *optional*):
            If passed, will limit the duplicate to the frameworks specified.
        old_checkpoint (`str`, *optional*):
            The name of the base checkpoint for the old model. Should be passed along when it can't be automatically
            recovered from the `model_type`.
    r   r   r   Nr   zuThe old model checkpoint could not be recovered from the model type. Please pass it to the `old_checkpoint` argument.T)r"   r#   r$   r!   Fr   r   r   )exist_okc                 S   s@   g | ]8}d t |krdt |krdt |krdt |kr|qS )tokenization
processingfeature_extractionimage_processingr=   r   r,   r,   r-   r.   (  s   z)create_new_model_like.<locals>.<listcomp>r   )r   r   r   )r   r   z    r{   )r`   rb   )r   r   r   c                 S   s@   g | ]8}d t |krdt |krdt |krdt |kr|qS )r  	processorr  r  r  r   r,   r,   r-   r.   Q  s   )r   r@   c              	   S   sR   t | }| }W 5 Q R X tdd|}t | d}|| W 5 Q R X ||kS )Nzfx_compatible\s*=\s*Truezfx_compatible = Falser+   )ro   rp   rE   r}   rq   )r   fprI   rs   r,   r,   r-   disable_fx_testZ  s    
z.create_new_model_like.<locals>.disable_fx_testr   r+   Zpipeline_model_mappingZis_pipeline_test_to_skip)r   r   r   z}The tests for symbolic tracing with torch.fx were disabled, you can add those once symbolic tracing works for your new model.r   r   r   r   r   zSThe model you picked has the same name for the model type and the checkpoint name (zW). As a result, it's possible some places where the new checkpoint should be, you have z1 instead. You should search for all instances of zB in the new files and check they're not badly used as checkpoints.zYThe model you picked has the same name for the model type and the lowercased model name (zW). As a result, it's possible some places where the new model type should be, you have zE in the new files and check they're not badly used as the model type.zThe constants at the start of the new tokenizer file created needs to be manually fixed. If your new model has a tokenizer fast, you will also need to manually add the converter in the `SLOW_TO_FAST_CONVERTERS` constant of `convert_slow_tokenizer.py`.)r   r   rA   rZ   r|   r   r   r   makedirsr   r3   r   r   rt   r   r   boolr   ro   r   r	  r   r   r  r  r!   )r   rv   r   r   r  Z
model_infor   ru   Zkeep_old_processingZprocessing_attrr   Zold_module_nameZmodule_folderZfiles_to_adaptr   Znew_module_namer   r  Zdisabled_fx_testZtests_folderZ	test_fileZnew_test_file_namer   r,   r,   r-   create_new_model_like  s    	 
  

   	
  


r  argsc                 C   s   t | j| jdS )N)config_filepath_to_repo)AddNewModelLikeCommandr  r   r  r,   r,   r-   "add_new_model_like_command_factory  s    r"  c                   @   s0   e Zd ZeedddZd	ddZdd ZdS )
r!  )parserc                 C   s:   |  d}|jdtdd |jdtdd |jtd d S )Nzadd-new-model-likez--config_filez8A file with all the information for this model creation.)typehelpz--path_to_repozFWhen not using an editable install, the path to the Transformers repo.)func)
add_parseradd_argumentr=   set_defaultsr"  )r#  Zadd_new_model_like_parserr,   r,   r-   register_subcommand  s    
    z*AddNewModelLikeCommand.register_subcommandNc              	   G   s   |d k	rtt |ddd}t|}W 5 Q R X |d | _tf |d | _|dd| _|dt | _	|d	d | _
nt \| _| _| _| _	| _
|| _d S )
Nrk   rl   rm   r  rv   r   Tr   r  )ro   jsonloadr  r   r   r   r   r   r   r  get_user_inputr   )r6   r  r   r  rr   configr,   r,   r-   __init__  s     
zAddNewModelLikeCommand.__init__c                 C   s@   | j d k	r t| j atd d at| j| j| j| j| j	d d S )Nsrcr   r  )
r   r   r   r   r  r  r   r   r   r  )r6   r,   r,   r-   run  s    

zAddNewModelLikeCommand.run)NN)r9   r:   r;   staticmethodr   r*  r/  r1  r,   r,   r,   r-   r!    s   

r!  )questiondefault_valueis_valid_answer
convert_tofallback_messager@   c                 C   s   |  ds| d } |dk	r*|  d| d} d}|st| }|dk	rRt|dkrR|}|dk	rd||}n8|dk	rz||}d}W q tk
r   d}Y qX nd}|s.t| q.|S )a  
    A utility function that asks a question to the user to get an answer, potentially looping until it gets a valid
    answer.

    Args:
        question (`str`): The question to ask the user.
        default_value (`str`, *optional*): A potential default value that will be used when the answer is empty.
        is_valid_answer (`Callable`, *optional*):
            If set, the question will be asked until this function returns `True` on the provided answer.
        convert_to (`Callable`, *optional*):
            If set, the answer will be passed to this function. If this function raises an error on the procided
            answer, the question will be asked again.
        fallback_message (`str`, *optional*):
            A message that will be displayed each time the question is asked again to the user.

    Returns:
        `Any`: The answer provided by the user (or the default), passed through the potential conversion function.
    r%   Nz [z] Fr   T)r[   inputrA   	Exceptionr	  )r3  r4  r5  r6  r7  Zvalid_answerZanswerr,   r,   r-   get_user_field  s*    


r:  )r   r@   c                 C   s2   |   dkrdS |   dkr dS t|  ddS )z&
    Converts a string to a bool.
    )1yyestrueT)0nnofalseFz0 is not a value that can be converted to a bool.N)r2   rZ   r   r,   r,   r-   convert_to_bool  s
    rC  c                  C   s  t tjj } d}|s|td}|| kr.d}qt| d t|| }t	|dkrt	|dkrjd
|}td| d qt|}|d	 j}|d	 j}|d	 j}|d	 j}|d
 }	d}
t	|d	 jdkrtd}
td}t||}td|jd}td|jd}td|jd}td|jd}td| dd}td}dd ||||fD }d
|}td| d| dtdd}|r|}|}|}|}n|dk	rtd| d d}nd}|dk	rtd!| d"d}nd}|dk	rtd#| d$d}nd}|dk	rtd%| d&d}nd}t|||||||||||d'}td(td)d*d+}td,| d|	 dtd)d*d+}|rjd}n"td-d.d/ d0}t t|d1}|||||
fS )2zE
    Ask the user for the necessary inputs to add the new model.
    FziWhat is the model you would like to duplicate? Please provide the lowercase `model_type` (e.g. roberta): Tr   r   z or zDid you mean ?r   r   Nr   zVWe couldn't find the name of the base checkpoint for that model, please enter it here.zZWhat is the name (with no special casing) for your new model in the paper (e.g. RoBERTa)? zJWhat identifier would you like to use for the `model_type` of this model? )r4  zQWhat lowercase name would you like to use for the module (folder) of this model? zdWhat prefix (camel-cased) would you like to use for the model classes of this model (e.g. Roberta)? zZWhat prefix (upper-cased) would you like to use for the constants relative to this model? z:What will be the name of the config class for this model? r1   zsPlease give a checkpoint identifier (on the model Hub) for this new model (e.g. facebook/FacebookAI/roberta-base): c                 S   s*   g | ]"}|d k	rt |ts|n|d qS r   )rd   tupler   r,   r,   r-   r.   ^  s   z"get_user_input.<locals>.<listcomp>r   z5Will your new model use the same processing class as z (z) (yes/no)? z.Please answer yes/no, y/n, true/false or 1/0. )r6  r7  z=What will be the name of the tokenizer class for this model? r   zCWhat will be the name of the image processor class for this model? r   zEWhat will be the name of the feature extractor class for this model? r   z=What will be the name of the processor class for this model? r   )	r   r   r   r   r    r!   r"   r#   r$   zUShould we add # Copied from statements when creating the new modeling file (yes/no)? r=  z-Please answer yes/no, y/n, true/false or 1/0.)r6  r4  r7  zOShould we add a version of your new model in all the frameworks implemented by zNPlease enter the list of framworks you want (pt, tf, flax) separated by spacesc                 S   s   t dd | dD S )Nc                 s   s   | ]}|d kV  qdS )r   Nr,   )r*   pr,   r,   r-   	<genexpr>  s     z3get_user_input.<locals>.<lambda>.<locals>.<genexpr>r%   )allr(   r   r,   r,   r-   r     r   z get_user_input.<locals>.<lambda>)r5  r%   )r4   r   r   r   keysr8  r	  difflibget_close_matchesrA   r5   r   r!   r"   r#   r$   r   r:  r   r   r   r   r   rC  r   r(   )Zmodel_typesZvalid_model_typer  Znear_choicesZold_model_infoZold_tokenizer_classZold_image_processor_classZold_feature_extractor_classZold_processor_classZold_frameworksr  r   Zdefault_patternsr   r   r   r   r    r   Zold_processing_classesr   r"   r#   r$   r!   r   r   Zall_frameworksr   r,   r,   r-   r-    s    





 





r-  )r   )NNF)NNF)NTN)N)N)N)N)N)NT)NT)NN)TNN)NNNN)ZrJ  r+  r   rE   argparser   r   Zdataclassesr   datetimer   	itertoolsr   pathlibr   typingr   r	   r
   r   r   r   r   r   r  r   r   r   Zmodels.auto.configuration_autor   utilsr   r   r   r   r0   r   Z
get_loggerr9   loggertodayyearr   __file__r   r   r   r   r   r=   r  rD   intrH   rV   r^   ri   PathLikert   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r"  r!  r:  rC  r-  r,   r,   r,   r-   <module>   sV  (

@
(5   =   '  
B%
,   H  
!0*  "
()I    
 R  
Y1  B  
b5   
 24    4