U
    4AfC                  =   @   s  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZ ddlmZ ddlmZmZmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& e'e(Z)eddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMg<Z*eee*Z+e,dNdOdPZ-G dQdR dRZ.dS )SzAutoProcessor class.    N)OrderedDict   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)FeatureExtractionMixin)ImageProcessingMixin)ProcessorMixin)TOKENIZER_CONFIG_FILE)FEATURE_EXTRACTOR_NAMEPROCESSOR_NAMEget_file_from_repologging   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigmodel_type_to_module_name!replace_list_option_in_docstrings)AutoFeatureExtractor)AutoImageProcessor)AutoTokenizer)ZalignZAlignProcessor)ZaltclipZAltCLIPProcessor)ZbarkZBarkProcessor)ZblipZBlipProcessor)zblip-2ZBlip2Processor)ZbridgetowerZBridgeTowerProcessor)Z	chameleonZChameleonProcessor)Zchinese_clipZChineseCLIPProcessor)ZclapZClapProcessor)ZclipCLIPProcessor)ZclipsegZCLIPSegProcessor)ZclvpZClvpProcessor)ZflavaZFlavaProcessor)ZfuyuZFuyuProcessor)gitZGitProcessor)zgrounding-dinoZGroundingDinoProcessor)Zgroupvitr   )ZhubertWav2Vec2Processor)ZideficsZIdeficsProcessor)Zidefics2ZIdefics2Processor)ZinstructblipZInstructBlipProcessor)ZinstructblipvideoZInstructBlipVideoProcessor)zkosmos-2ZKosmos2Processor)Z
layoutlmv2ZLayoutLMv2Processor)Z
layoutlmv3ZLayoutLMv3Processor)ZllavaLlavaProcessor)zllava-next-videoZLlavaNextVideoProcessor)Z
llava_nextZLlavaNextProcessor)ZmarkuplmZMarkupLMProcessor)ZmctctZMCTCTProcessor)zmgp-strZMgpstrProcessor)Z	oneformerZOneFormerProcessor)Zowlv2ZOwlv2Processor)ZowlvitZOwlViTProcessor)Z	paligemmaZPaliGemmaProcessor)Z
pix2structZPix2StructProcessor)Z	pop2pianoZPop2PianoProcessor)ZsamZSamProcessor)Zseamless_m4tZSeamlessM4TProcessor)Zsewr   )zsew-dr   )ZsiglipZSiglipProcessor)Zspeech_to_textZSpeech2TextProcessor)Zspeech_to_text_2ZSpeech2Text2Processor)Zspeecht5ZSpeechT5Processor)ZtrocrZTrOCRProcessor)ZtvltZTvltProcessor)ZtvpZTvpProcessor)Z	unispeechr   )zunispeech-satr   )Zvideo_llavaZVideoLlavaProcessor)ZviltZViltProcessor)Zvipllavar   )zvision-text-dual-encoderZVisionTextDualEncoderProcessor)Zwav2vec2r   )zwav2vec2-bertr   )zwav2vec2-conformerr   )Zwavlmr   )ZwhisperZWhisperProcessor)ZxclipZXCLIPProcessor)
class_namec              	   C   s   t  D ]T\}}| |krt|}td| d}zt|| W   S  tk
rZ   Y qY qX qtj	 D ]}t|dd | krh|  S qhtd}t
|| rt|| S d S )N.ztransformers.models__name__Ztransformers)PROCESSOR_MAPPING_NAMESitemsr   	importlibimport_modulegetattrAttributeErrorPROCESSOR_MAPPINGZ_extra_contentvalueshasattr)r   module_nameZ
processorsmodule	processorZmain_module r+   L/tmp/pip-unpacked-wheel-zw5xktn0/transformers/models/auto/processing_auto.pyprocessor_class_from_namer   s    



r-   c                   @   s:   e Zd ZdZdd Zeeedd Ze	d
ddZ
d	S )AutoProcessora  
    This is a generic processor class that will be instantiated as one of the processor classes of the library when
    created with the [`AutoProcessor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                 C   s   t dd S )Nz}AutoProcessor is designed to be instantiated using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)` method.)EnvironmentError)selfr+   r+   r,   __init__   s    zAutoProcessor.__init__c                    s    dd}|dk	r@tdt  dddk	r8td| d<   dd}  dd}d d	< d}d} fd
dttj	
 D }t|tf|}	|	dk	rtj|f \}
}|
dd}d|
di kr|
d d }|dkrt|tf|}|dk	r4tj|f \}
}|
dd}d|
di kr4|
d d }|dk	r|dkrtj|f \}
}|
dd}d|
di kr|
d d }|dkrt|tf|}|dk	rt|dd}t|}
W 5 Q R X |
dd}d|
di kr|
d d }|dkrLt|tstj|fd|i }t|dd}t|drLd|jkrL|jd }|dk	r^t|}|dk	}|dk	pzt|t k}t!||||}|r|rt"||f }  dd}t#j$%|r|&  |j|fd|i S |dk	r|j|fd|i S t|t kr t t| j|f S zt'j|fd|i W S  t(k
r   zt)j|fd|i W  Y S  t(k
r|   Y nX zt*j|fd|i W  Y S  t(k
r   Y nX Y nX td| ddS )a  
        Instantiate one of the processor classes of the library from a pretrained model vocabulary.

        The processor class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible):

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co.
                - a path to a *directory* containing a processor files saved using the `save_pretrained()` method,
                  e.g., `./my_model_directory/`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the feature extractor files and override the cached versions
                if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final feature extractor object. If `True`, then this
                functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
                `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are feature extractor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Examples:

        ```python
        >>> from transformers import AutoProcessor

        >>> # Download processor from huggingface.co and cache.
        >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")

        >>> # If processor files are in a directory (e.g. processor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # processor = AutoProcessor.from_pretrained("./test/saved_model/")
        ```use_auth_tokenNzrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.tokenzV`token` and `use_auth_token` are both specified. Please set only the argument `token`.configtrust_remote_codeTZ
_from_autoc                    s   i | ]}| kr| | qS r+   r+   ).0keykwargsr+   r,   
<dictcomp>   s      z1AutoProcessor.from_pretrained.<locals>.<dictcomp>processor_classr.   auto_mapzutf-8)encodingZcode_revisionz!Unrecognized processing class in z. Can't instantiate a processor, a tokenizer, an image processor or a feature extractor for this model. Make sure the repository contains the files of at least one of those processing classes.)+popwarningswarnFutureWarningget
ValueErrorinspect	signaturer   
parameterskeysr   r	   Zget_processor_dictr   r   Zget_image_processor_dictr   Zget_feature_extractor_dictr
   openjsonload
isinstancer   r   from_pretrainedr#   r'   r<   r-   typer%   r   r   ospathisdirZregister_for_auto_classr   	Exceptionr   r   )clsZpretrained_model_name_or_pathr9   r2   r4   r5   r;   Zprocessor_auto_mapZget_file_from_repo_kwargsZprocessor_config_fileZconfig_dict_Zpreprocessor_config_fileZtokenizer_config_filereaderZhas_remote_codeZhas_local_coder+   r8   r,   rL      s"   E
 
 

 



    

zAutoProcessor.from_pretrainedFc                 C   s   t j| ||d dS )a  
        Register a new processor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            processor_class ([`FeatureExtractorMixin`]): The processor to register.
        )exist_okN)r%   register)Zconfig_classr;   rU   r+   r+   r,   rV   c  s    
zAutoProcessor.registerN)F)r   
__module____qualname____doc__r1   classmethodr   r   rL   staticmethodrV   r+   r+   r+   r,   r.      s    Jr.   )/rY   r!   rD   rI   rN   r?   collectionsr   Zconfiguration_utilsr   Zdynamic_module_utilsr   r   Zfeature_extraction_utilsr   Zimage_processing_utilsr   Zprocessing_utilsr	   Ztokenization_utilsr
   utilsr   r   r   r   Zauto_factoryr   Zconfiguration_autor   r   r   r   Zfeature_extraction_autor   Zimage_processing_autor   Ztokenization_autor   Z
get_loggerr   loggerr   r%   strr-   r.   r+   r+   r+   r,   <module>   s   
A
