U
    4Af                     @   s  U d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z( e rddl)m*Z* ndZ*e+e,Z-er4e Z.ee/ee
e/ e
e/ f f e0d< n>ede rDdnde rRdndffdde rhdndffdde r~dndffdde rdnde rdndffddde rdndffd e rd!nddffd"d#d$e rd%nde rd&ndffd'd(e rd)ndffd*d+d,d-de r(dndffd.d/e r>d0ndffd1de rTd2ndffd3d4e rjd5ndffd6de rdndffd7d8e rd9nde rd:ndffd;d<e rd=nde rd>ndffd?de rdndffd@d4e rd5ndffdAdBe r
dCndffdDdBe r dCndffdEdFe r6dGnde rDdHndffdIdJe rZdKndffdLde rpdMndffdNdOe rdPndffdQe rdRnde rdSndffdTdUdVdWd4e rd5ndffdXd/e rd0ndffdYdZe rd[ndffd\e rd]nde rd^ndffd_d`e r(dandffdbdce r>ddndffdedfe rTdgndffdhde rjdndffdie r~djnddffdkdlde rdmndffdne rdonddffdpdqdre rdsndffdtdudve rdwndffdxe rdynde r dzndffd{e rdynde r"dzndffd|de r8dndffd}e rLd~nddffdd/e rdd0ndffdd/e rzd0ndffdd/e rd0ndffdde rdndffddd/e rd0ndffddde rdndffddBe rdCndffdde rdndffddd4e rd5ndffdde r0d>ndffdd=e rFd>ndffdd/e r\d0ndffdd/e rrd0ndffde rd=nde rd>ndffde rd=nde rd>ndffdde rdnde rdndffdde rdndffdde rdndffdde rdndffdde r2dndffdde rHdndffdde r^dndffde rrd=nde rd>ndffdd=e rd>ndffdd=e rd>ndffdd=e rd>ndffdde rdndffde rdnde rdndffddde rdndffde r&dnddffdde r>dndffdde rTdndffde rhdnddffde r~dnde rdndffde rdnde rdndffdd4e rd5ndffdde rdndffdde rd=nde rd>ndffde 	rd=nde 	r d>ndffde 	r4dnddffdde 	rLdndffdde 	rbdndffdde 	rxdndffdd4e 	rd5ndffde 	rdnde 	rdndffdde 	rdndffdde 	rdndffdde 	rdndffdde 
rdndffde 
rdnde 
r*dndffde 
r>dnde 
rLdndffde 
r`dnde 
rndndffdde 
rdndffddBe 
rdCndffdde 
rdndffdd/e 
rd0ndffddBe 
rdCndffddBe 
rdCndffdd=e rd>ndffde rd(nde r*d)ndffde r>d(nde rLd)ndffdde rbd=nde rpd>ndffddJe rdKndffdd=e rd>ndffddde rdndffde rdnddffddde rdndffdde rdndffdde rdndffddde r&dndffde r:dynde rHdzndffde r\dnde rjdndffd e rdnde rdndffdde rdndffdd4e rd5ndffdd4e rd5ndffdd	d
e rdndffdde rdndffde r.dnde r>dndffde rVdnde rfdndffde r~dnddffde rdnddffdde rdnddffddde rؐdndffdde rdndffdd/e rd0ndffde rdnde r,dndffde rBdnde rPdndffd d!d"d#de rtdndffd$e rd%nde rd&ndffd'e rdnde rdndffd(d=e rd>ndffd)de rdndffd*d=e rd>ndffd+de r dndffd,d-d.d/d0d1d2e rPd3ndffd4dBe rhdCndffd5e rd6nde rd7ndffd8d9e rd:nddffd;e rdnde rdndffd<e rdnde rdndffd=e rd>nde rd?ndffd@e r4dnde rBdndffdAe rXdnde rfdndffgZ.e"e$e.Z1dBdC e$2 D Z3e/dDdEdFZ4dNee/ej5f e
ee/ej5f  e6e
e6 e
e	e/e/f  e
ee6e/f  e
e/ e6e/dI	dJdKZ7G dLdM dMZ8dS (O  zAuto Tokenizer class.    N)OrderedDict)TYPE_CHECKINGDictOptionalTupleUnion   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)PreTrainedTokenizer)TOKENIZER_CONFIG_FILE)cached_fileextract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)PreTrainedTokenizerFastTOKENIZER_MAPPING_NAMESZalbertZAlbertTokenizerZAlbertTokenizerFastZalignZBertTokenizerZBertTokenizerFastZbark)Zbart)ZBartTokenizerZBartTokenizerFastZbarthezZBarthezTokenizerZBarthezTokenizerFast)Zbartpho)ZBartphoTokenizerNZbertzbert-generationZBertGenerationTokenizer)zbert-japanese)ZBertJapaneseTokenizerN)Zbertweet)ZBertweetTokenizerNZbig_birdZBigBirdTokenizerZBigBirdTokenizerFastZbigbird_pegasusZPegasusTokenizerZPegasusTokenizerFast)Zbiogpt)ZBioGptTokenizerN)Z
blenderbot)ZBlenderbotTokenizerZBlenderbotTokenizerFast)zblenderbot-small)ZBlenderbotSmallTokenizerNZblipzblip-2ZGPT2TokenizerZGPT2TokenizerFastZbloomZBloomTokenizerFastZbridgetowerZRobertaTokenizerZRobertaTokenizerFastZbros)Zbyt5)ZByT5TokenizerNZ	camembertZCamembertTokenizerZCamembertTokenizerFast)Zcanine)ZCanineTokenizerNZ	chameleonZLlamaTokenizerZLlamaTokenizerFastZchinese_clipZclapZclipZCLIPTokenizerZCLIPTokenizerFastZclipseg)Zclvp)ZClvpTokenizerNZ
code_llamaZCodeLlamaTokenizerZCodeLlamaTokenizerFastZcodegenZCodeGenTokenizerZCodeGenTokenizerFastZcohereZCohereTokenizerFastZconvbertZConvBertTokenizerZConvBertTokenizerFastZcpmZCpmTokenizerZCpmTokenizerFast)Zcpmant)ZCpmAntTokenizerN)Zctrl)ZCTRLTokenizerN)zdata2vec-audioZWav2Vec2CTCTokenizerNzdata2vec-textZdbrxZdebertaZDebertaTokenizerZDebertaTokenizerFastz
deberta-v2ZDebertaV2TokenizerZDebertaV2TokenizerFastZ
distilbertZDistilBertTokenizerZDistilBertTokenizerFastZdprZDPRQuestionEncoderTokenizerZDPRQuestionEncoderTokenizerFastZelectraZElectraTokenizerZElectraTokenizerFastZernieZernie_mZErnieMTokenizer)Zesm)ZEsmTokenizerNZfalconr   Zfastspeech2_conformerZFastSpeech2ConformerTokenizer)Zflaubert)ZFlaubertTokenizerNZfnetZFNetTokenizerZFNetTokenizerFast)Zfsmt)ZFSMTTokenizerNZfunnelZFunnelTokenizerZFunnelTokenizerFastZgemmaZGemmaTokenizerZGemmaTokenizerFastZgemma2gitzgpt-sw3ZGPTSw3TokenizerZgpt2Zgpt_bigcodeZgpt_neoZgpt_neoxZGPTNeoXTokenizerFast)Zgpt_neox_japanese)ZGPTNeoXJapaneseTokenizerNZgptj)zgptsan-japanese)ZGPTSanJapaneseTokenizerNzgrounding-dinoZgroupvitZherbertZHerbertTokenizerZHerbertTokenizerFast)Zhubertr    ZibertZideficsZidefics2ZinstructblipZinstructblipvideoZjambaZjetmoe)Zjukebox)ZJukeboxTokenizerNzkosmos-2ZXLMRobertaTokenizerZXLMRobertaTokenizerFastZlayoutlmZLayoutLMTokenizerZLayoutLMTokenizerFastZ
layoutlmv2ZLayoutLMv2TokenizerZLayoutLMv2TokenizerFastZ
layoutlmv3ZLayoutLMv3TokenizerZLayoutLMv3TokenizerFastZ	layoutxlmZLayoutXLMTokenizerZLayoutXLMTokenizerFastZledZLEDTokenizerZLEDTokenizerFastZliltZllamaZllavazllava-next-videoZ
llava_nextZ
longformerZLongformerTokenizerZLongformerTokenizerFastZlongt5ZT5TokenizerZT5TokenizerFast)Zluke)ZLukeTokenizerNZlxmertZLxmertTokenizerZLxmertTokenizerFastZm2m_100ZM2M100TokenizerZmambaZmamba2ZmarianZMarianTokenizerZmbartZMBartTokenizerZMBartTokenizerFastZmbart50ZMBart50TokenizerZMBart50TokenizerFastZmegazmegatron-bert)zmgp-str)ZMgpstrTokenizerNZmistralZmixtralZmlukeZMLukeTokenizerZ
mobilebertZMobileBertTokenizerZMobileBertTokenizerFastZmpnetZMPNetTokenizerZMPNetTokenizerFastZmptZmraZmt5ZMT5TokenizerZMT5TokenizerFastZmusicgenZmusicgen_melodyZmvpZMvpTokenizerZMvpTokenizerFastZnezhaZnllbZNllbTokenizerZNllbTokenizerFastznllb-moeZnystromformerZolmoZ	oneformerz
openai-gptZOpenAIGPTTokenizerZOpenAIGPTTokenizerFastoptZowlv2ZowlvitZ	paligemmaZpegasusZ	pegasus_x)Z	perceiver)ZPerceiverTokenizerNZ	persimmonphiZphi3)Zphobert)ZPhobertTokenizerNZ
pix2structZplbartZPLBartTokenizer)Z
prophetnet)ZProphetNetTokenizerNZqdqbertZqwen2ZQwen2TokenizerZQwen2TokenizerFastZ	qwen2_moe)Zrag)ZRagTokenizerNrealmZRealmTokenizerZRealmTokenizerFastZrecurrent_gemmaZreformerZReformerTokenizerZReformerTokenizerFastZrembertZRemBertTokenizerZRemBertTokenizerFastZ	retribertZRetriBertTokenizerZRetriBertTokenizerFastZrobertazroberta-prelayernorm)Zroc_bert)ZRoCBertTokenizerNZroformerZRoFormerTokenizerZRoFormerTokenizerFastZrwkvZseamless_m4tZSeamlessM4TTokenizerZSeamlessM4TTokenizerFastZseamless_m4t_v2ZsiglipZSiglipTokenizerZspeech_to_textZSpeech2TextTokenizer)Zspeech_to_text_2)ZSpeech2Text2TokenizerNZspeecht5ZSpeechT5Tokenizer)Zsplinter)ZSplinterTokenizerZSplinterTokenizerFastZsqueezebertZSqueezeBertTokenizerZSqueezeBertTokenizerFastZstablelmZ
starcoder2Zswitch_transformersZt5)Ztapas)ZTapasTokenizerN)Ztapex)ZTapexTokenizerN)z
transfo-xl)ZTransfoXLTokenizerNZtvpZudopZUdopTokenizerZUdopTokenizerFastZumt5Zvideo_llavaZviltZvipllavaZvisual_bert)Zvits)ZVitsTokenizerN)Zwav2vec2r    )zwav2vec2-bertr    )zwav2vec2-conformerr    )Zwav2vec2_phoneme)ZWav2Vec2PhonemeCTCTokenizerNZwhisperZWhisperTokenizerZWhisperTokenizerFastZxclipZxglmZXGLMTokenizerZXGLMTokenizerFast)Zxlm)ZXLMTokenizerNzxlm-prophetnetZXLMProphetNetTokenizerzxlm-robertazxlm-roberta-xlZxlnetZXLNetTokenizerZXLNetTokenizerFastZxmodZyosoc                 C   s   i | ]\}}||qS  r%   ).0kvr%   r%   N/tmp/pip-unpacked-wheel-zw5xktn0/transformers/models/auto/tokenization_auto.py
<dictcomp>'  s      r*   )
class_namec              	   C   s   | dkrt S t D ]T\}}| |krt|}td| d}zt|| W   S  tk
rf   Y qY qX qtj	 D ].\}}|D ] }t|dd | kr|    S qqttd}t
|| rt|| S d S )Nr   .ztransformers.models__name__Ztransformers)r   r   itemsr   	importlibimport_modulegetattrAttributeErrorTOKENIZER_MAPPING_extra_contenthasattr)r+   module_nameZ
tokenizersmoduleconfig	tokenizerZmain_moduler%   r%   r)   tokenizer_class_from_name*  s$    


r:   F )	pretrained_model_name_or_path	cache_dirforce_downloadresume_downloadproxiestokenrevisionlocal_files_only	subfolderc	                 K   s   |	 dd}
|
dk	r4tdt |dk	r0td|
}|	dd}t| t||||||||ddd|d}|dkrzt	d i S t
||}t|d	d
}t|}W 5 Q R X ||d< |S )a	  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        resume_download:
            Deprecated and ignored. All downloads are now resumed by default when possible.
            Will be removed in v5 of Transformers.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `Dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`._commit_hashF)r=   r>   r?   r@   rA   rB   rC   rD   Z _raise_exceptions_for_gated_repoZ%_raise_exceptions_for_missing_entriesZ'_raise_exceptions_for_connection_errorsrH   z\Could not locate the tokenizer configuration file, will try to use the model config instead.zutf-8)encoding)popwarningswarnFutureWarning
ValueErrorgetr   r   loggerinfor   openjsonload)r<   r=   r>   r?   r@   rA   rB   rC   rD   kwargsrE   Zcommit_hashZresolved_config_filereaderresultr%   r%   r)   get_tokenizer_configF  sD    I

rX   c                   @   s6   e Zd ZdZdd Zeeedd Zd
dd	Z	dS )AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                 C   s   t dd S )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)EnvironmentError)selfr%   r%   r)   __init__  s    zAutoTokenizer.__init__c              	   O   sL  | dd}|dk	r@tdt |dddk	r8td||d< | dd}d|d< | d	d}| d
d}| dd}|dd}	|dk	r6d}
t|d}|dkrtd| dddd t D  d|\}}|r|dk	rt	|}
n
t
d |
dkr
t	|}
|
dkr$td| d|
j|f||S t|f|}d|krX|d |d< |d}d}d|krt|d ttfr|d }n|d dd}|dkr$t|ts|	rt||	f|}t|ddd }tjf |}ntj|fd|i|}|j}t|dr$d|jkr$|jd }|dk	}t|tkp`|dk	o`t	|dk	p`t	|d dk	}t||||}|r|r|r|d dk	r|d }n|d }t||f|}
| dd}tj|r|
   |
j|f|d|i|S |dk	r`d}
|r|!ds| d}t	|}
|
dkr4|}t	|}
|
dkrNtd| d |
j|f||S t|t"rt|j#t|j$k	rt
d!|j$j% d"|j#j% d# |j$}t&t|j'}|dk	rtt| \}}|r|s|dkr|j|f||S |dk	r|j|f||S td$td%|j% d&dd'd t D  ddS )(a]  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PretrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            use_fast (`bool`, *optional*, defaults to `True`):
                Use a [fast Rust-based tokenizer](https://huggingface.co/docs/tokenizers/index) if it is supported for
                a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
                is returned instead.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
        ```rE   NrF   rA   rG   r8   TZ
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c                 s   s   | ]
}|V  qd S Nr%   r&   cr%   r%   r)   	<genexpr>+  s     z0AutoTokenizer.from_pretrained.<locals>.<genexpr>r,   zt`use_fast` is set to `True` but the tokenizer class does not have a fast version.  Falling back to the slow version.zTokenizer class z is not currently imported.rH   tokenizer_classauto_maprY   F)Zreturn_tensorsZFastr   r   Zcode_revisionz- does not exist or is not currently imported.z The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.zzThis tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer.z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c                 s   s   | ]}|j V  qd S ra   )r-   rb   r%   r%   r)   rd     s     )(rJ   rK   rL   rM   rO   rN   r   joinkeysr:   rP   warningfrom_pretrainedrX   
isinstancetuplelistr	   r   r   r   Z	for_modelre   r5   rf   typer3   r   r
   ospathisdirZregister_for_auto_classendswithr   decoderencoder	__class__r   r-   )clsr<   inputsrU   rE   r8   r]   r^   r_   r`   re   Ztokenizer_class_tupleZtokenizer_class_nameZtokenizer_fast_class_nameZtokenizer_configZconfig_tokenizer_classZtokenizer_auto_mapZ	gguf_pathZconfig_dictZhas_remote_codeZhas_local_codeZ	class_ref_Ztokenizer_class_candidateZ
model_typeZtokenizer_class_pyZtokenizer_class_fastr%   r%   r)   rj     s    M
$









   







&zAutoTokenizer.from_pretrainedNFc                 C   s   |dkr|dkrt d|dk	r2t|tr2t d|dk	rLt|trLt d|dk	r|dk	rt|tr|j|krt d|j d| d| tjkrt|  \}}|dkr|}|dkr|}tj| ||f|d dS )	a  
        Register a new tokenizer in this mapping.


        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
                The slow tokenizer to register.
            fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
                The fast tokenizer to register.
        NzKYou need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_classz:You passed a fast tokenizer in the `slow_tokenizer_class`.z:You passed a slow tokenizer in the `fast_tokenizer_class`.zThe fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not consistent with the slow tokenizer class you passed (fast tokenizer has z and you passed z!. Fix one of those so they match!)exist_ok)rN   
issubclassr   r   slow_tokenizer_classr3   r4   register)Zconfig_classr{   Zfast_tokenizer_classry   Zexisting_slowZexisting_fastr%   r%   r)   r|     s0    
zAutoTokenizer.register)NNF)
r-   
__module____qualname____doc__r\   classmethodr   r   rj   r|   r%   r%   r%   r)   rY     s    _rY   )NFNNNNFr;   )9r   r/   rS   ro   rK   collectionsr   typingr   r   r   r   r   Zconfiguration_utilsr	   Zdynamic_module_utilsr
   r   Zmodeling_gguf_pytorch_utilsr   Ztokenization_utilsr   Ztokenization_utils_baser   utilsr   r   r   r   r   r   Zencoder_decoderr   Zauto_factoryr   Zconfiguration_autor   r   r   r   r   Ztokenization_utils_fastr   Z
get_loggerr-   rP   r   str__annotations__r3   r.   ZCONFIG_TO_TYPEr:   PathLikeboolrX   rY   r%   r%   r%   r)   <module>   s`   	
*      k
       o