
    i                        U d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#m$Z$  e       rddl%m&Z& ndZ& e       rddl'm(Z( ndZ( ejR                  e*      Z+i Z,e-e.e/e   f   e0d<   i Z1e-e.e/e   f   e0d<    ee.e.dz  f   g d e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd  e       rdndfd! e       rd"ndfd# e       rd$ndfd%d& e       rdndfd' e       rd(ndfd)d*d+ e       rd,ndfd- e       rd.ndfd/d0 e       rd1ndfd2d3 e       rdndfd4 e       rd5ndfd6d7 e       rdndfd8d9 e       rd:ndfd;d< e       rdndfd=d> e       rdndfd? e       rdndfd@dA e       rdBndfdC e       rd5ndfdD e       rdndfdE e       rdndfdF e       rdndfdG e       rdndfdH e       rdIndfdJdKdLdMdN e       rd5ndfdO e       rdPndfdQ e       rdRndfdSdT e       rdndfdU e       rdVndfdW e       rdndfdX e       rd5ndfdY e       rdndfdZd[ e       rd\ndfd] e       rd^ndfd_d` e       rdndfda e       rd5ndfdb e       rdcndfdd e       rdendfdfdg e       rdhndfdi e       rdjndfdk e       rdjndfdl e       rdjndfdm e       rdjndfdn e       rdjndfdo e       rdjndfdp e       rdndfdq e       rdrndfds e       rdrndfdt e       rdrndfdu e       rdrndfdv e       rdrndfdw e       rdrndfdx e       rdrndfdy e       rdrndfdz e       rdrndfd{ e       rd|ndfd} e       rd5ndfd~ e       rd5ndfd e       rd5ndfd e       rd\ndfdd e       rd5ndfddddd e       rdndfd e       rdndfd e       rdndfddd e       rdndfd e       rdndfd e       rd5ndfd e       rd5ndfd e       rdndfd e       rd5ndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd"ndfd e       rd"ndfd e       rdndfdd e       rdndfd e       rdndfd e       rd\ndfd e       rd\ndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd e       rdndfd e       rdndfdd e	       rdn
 e       rdrndfd e	       rdn
 e       rdrndfd e	       rdn
 e       rdrndfd e	       rdn
 e       rdrndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd\ndfdȑd e       rdndfd e       rdndfd e       rdndfd e       rdndfdΑd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd\ndfd e       rd\ndfd e       rd5ndfd e       rd\ndfd e       rdndfd e       rdndfd e       rdndfd e       rd5ndfd e       rdndfd e       rdndfd e       rdndfd e       rd.ndfd e       rd.ndfdd e       rd5ndfdd e       rdndfd e	       rdn
 e       rdrndfd e       rdndfdd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd e       rdndfd e       rdjndfd e       rdndfd e       rdndfd  e       rdndfdddd e       rdndfd e       rd\ndfd e       rdndfd e       rdndfd	 e       rd
ndfd e       rd
ndfd e       rdjndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd e       rdndfd e       rd\ndfd e       rd5ndfd e       rdndfd e       rdndfd e       rdjndfdd e       rdndfd e       rdndfd e       rd ndfd! e       rdndfd"d#d$ e       rdndfd% e       rdndfd&d' e	       rdn
 e       rdrndfd(d)d*d+d, e       rd-ndfd. e       rdndfd/ e       rd0ndfd1d2 e       rdndfd3 e       rdndfd4 e       rd5ndfd6 e       rd\ndfd7 e       rdndfd8 e       rdndf      Z2 ee e2      Z3 e jh                         D  ci c]  \  } }|| 
 c}} Z5d9 Z6d: Z7d;e.d<e/e   dz  fd=Z8	 	 	 	 	 	 	 dJd>e.ejr                  e.   z  d?e.ejr                  e.   z  dz  d@e:dAe-e.e.f   dz  dBe:e.z  dz  dCe.dz  dDe:dEe.d<e-e.ef   fdFZ; G dG dH      Z<dIdHgZ=yc c}} w (K  zAuto Tokenizer class.    N)OrderedDict)Any)is_mistral_common_available   )PreTrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)TOKENIZER_CONFIG_FILE)extract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging)cached_file   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)TokenizersBackend)SentencePieceBackendREGISTERED_TOKENIZER_CLASSESREGISTERED_FAST_ALIASESaimv2CLIPTokenizeralbertAlbertTokenizeralignBertTokenizeraudioflamingo3Qwen2Tokenizer
aya_visionCohereTokenizerbarkbartRobertaTokenizerbarthezBarthezTokenizer)bartphoBartphoTokenizerbertzbert-generationBertGenerationTokenizer)zbert-japaneseBertJapaneseTokenizer)bertweetBertweetTokenizerbig_birdBigBirdTokenizerbigbird_pegasusPegasusTokenizer)biogptBioGptTokenizer
blenderbotBlenderbotTokenizer)zblenderbot-smallBlenderbotSmallTokenizerblipzblip-2GPT2Tokenizer)bridgetowerr+   bros)byt5ByT5Tokenizer	camembertCamembertTokenizer)canineCanineTokenizerchinese_clip)clapr+   clipclipseg)clvpClvpTokenizer
code_llamaCodeLlamaTokenizercodegencoherecohere2colqwen2convbertcpmCpmTokenizer)cpmantCpmAntTokenizer)ctrlCTRLTokenizer)zdata2vec-audioWav2Vec2CTCTokenizer)zdata2vec-textr+   dbrxdebertaDebertaTokenizerz
deberta-v2DebertaV2Tokenizer)diaDiaTokenizer
distilbertdprDPRQuestionEncoderTokenizerelectraemu3ernie)esmEsmTokenizerfalcon_mambaGPTNeoXTokenizerfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubertFlaubertTokenizerflava	flex_olmo	florence2BartTokenizerfnetFNetTokenizer)fsmtFSMTTokenizerfunnelFunnelTokenizergemmaGemmaTokenizergemma2gemma3gemma3_textgemma3ngemma3n_textgitglmr   glm4glm4_moeglm4_moe_liteglm4v	glm4v_moe	glm_imageglmasrgot_ocr2zgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japaneseGPTNeoXJapaneseTokenizergptj)graniter?   )
granitemoer?   )granitemoehybridr?   )granitemoesharedr?   zgrounding-dinogroupvitherbertHerbertTokenizer)hubertr[   )ibertr+   ideficsLlamaTokenizeridefics2instructblipinstructblipvideointernvljais2zkosmos-2XLMRobertaTokenizerlasr_ctcParakeetTokenizerlasr_encoderlayoutlm
layoutlmv2LayoutLMv2Tokenizer
layoutlmv3LayoutLMv3Tokenizer	layoutxlmLayoutXLMTokenizerledLEDTokenizerlighton_ocrQwen2TokenizerFastlilt
longformerlongt5T5Tokenizer)lukeLukeTokenizerlxmertLxmertTokenizerm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermarkuplmMarkupLMTokenizermbartMBartTokenizermbart50MBart50Tokenizer)megar+   zmegatron-bert
metaclip_2)zmgp-strMgpstrTokenizer
ministral3MistralCommonBackendmistralmistral3mixtralmlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizermpnetMPNetTokenizermpt)mrar+   mt5musicgenmusicgen_melodymvpMvpTokenizer)myt5MyT5TokenizernezhanllbNllbTokenizerznllb-moenougatNougatTokenizernystromformerolmoolmo2olmo3olmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizeroptovis2owlv2owlvitpegasus	pegasus_x)	perceiverPerceiverTokenizerphi)phobertPhobertTokenizer
pix2structpixtralplbartPLBartTokenizer)
prophetnetProphetNetTokenizerqdqbertqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moe)ragRagTokenizerrealmrecurrent_gemmareformerReformerTokenizerrembertRemBertTokenizer	retribert)robertar+   )zroberta-prelayernormr+   )roc_bertRoCBertTokenizerroformerRoFormerTokenizerrwkvsam3
sam3_videoseamless_m4tSeamlessM4TTokenizerseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2Siglip2Tokenizerspeech_to_textSpeech2TextTokenizerspeecht5SpeechT5Tokenizer)splinterSplinterTokenizersqueezebertstablelm
starcoder2switch_transformerst5t5gemma)tapasTapasTokenizertrocrtvpudopUdopTokenizerumt5)	unispeechr[   )zunispeech-satr[   viltvisual_bert)vitsVitsTokenizervoxtral)wav2vec2r[   )zwav2vec2-bertr[   )zwav2vec2-conformerr[   )wav2vec2_phonemeWav2Vec2PhonemeCTCTokenizerwhisperWhisperTokenizerxclipxglmXGLMTokenizer)xlmXLMTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerxlstmxmodyosoc                 t    t        | dd      5 }t        j                  |      cddd       S # 1 sw Y   yxY w)z*Loads a vocabulary file into a dictionary.rutf-8encodingN)openjsonload)
vocab_filereaders     t/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.py
load_vocabrS  K  s1    	j#	0 !Fyy ! ! !s   .7c                     g }t        | dd      5 }|D ]O  }|j                         }|s|j                  d      r(|j                  t	        |j                                      Q 	 ddd       |S # 1 sw Y   |S xY w)z Loads a merges file into a list.rI  rJ  rK  #N)rM  strip
startswithappendtuplesplit)merges_filemergesrQ  lines       rR  load_mergesr^  Q  sr    F	k3	1 3V 	3D::<DDOOC0eDJJL12	33
 M3
 Ms   A1A1*A11A;
class_namereturnc                 @   | dv rt         S | t        v r	t        |    S | t        v r	t        |    S | dk(  rt         S t        j	                         D ]]  \  }}|| k(  st        |      }|dv r| dk(  rt        j                  dd      }nt        j                  d| d      }	 t        ||       c S  t        j                  j                         D ]  }t        |d	d       | k(  s|c S  t        j                  d      }t        ||       rt        ||       S y # t        $ r Y w xY w)
N>   BloomTokenizerBloomTokenizerFastr   )r   r   r   	ministralr   r   r8  r   z.tokenization_mistral_commontransformers.ztransformers.models__name__)r   r   r   TOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattr)r_  module_nametokenizer_classmodule	tokenizermain_modules         rR  tokenizer_class_from_namerw  \  sC   ==  ,,&z2211+J77((   )@(E(E(G $_j(3K@Krr"88"001OQ_`"001[M1BDYZvz22 '55<<> 	9j$/:= )).9K{J'{J// " s   D	DDpretrained_model_name_or_path	cache_dirforce_downloadproxiestokenrevisionlocal_files_only	subfolderc                 "   |j                  d      }	t        | t        |||||||ddd|	      }
|
t        j	                  d       i S t        |
|	      }	t        |
d      5 }t        j                  |      }ddd       |	d<   |S # 1 sw Y   xY w)aY  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        proxies (`dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```_commit_hashF)ry  rz  r{  r|  r}  r~  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  Nz\Could not locate the tokenizer configuration file, will try to use the model config instead.rJ  rK  )	getr   r   loggerinfor   rM  rN  rO  )rx  ry  rz  r{  r|  r}  r~  r  kwargscommit_hashresolved_config_filerQ  results                rR  get_tokenizer_configr    s    J **^,K&%%))..305  #rs	%&:KHK	"W	5 #6"#(F>M# #s    BBc                   \    e Zd ZdZd Ze ee      dee	z  fd              Z
e	 dd       Zy)AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                     t        d      )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    rR  __init__zAutoTokenizer.__init__  s    _
 	
    r`  c           	         |j                  dd      }d|d<   |j                  dd      }|j                  dd      }|j                  dd      }|j                  d      }|vt        j                  |d      }	|	,t        d	| d
dj	                  d t        D               d      t        |	      }
|
t        d|	 d       |
j                  |g|i |S |r3t        ||fi |}t        |d      d   }t        j                  d'i |}n|	 t        j                  |fd|i|}|j                  }t        |fi |}|j                  dd      }d}d|v r4t        |d   t         t"        f      r|d   }n|d   j                  dd      }|\|Z|X|dk7  rSt        j                  |d      j%                  dd      |j%                  dd      k7  r	 t'        j                  |g|i |S d|v r|d   |d<   |r|j%                  dd      }|du}t)        |      t*        v xs% |duxr t        |      duxs t        |dz         du}|r:|d   |d   }n|d   }d|v r|j-                  d      d   }nd}t/        |||||      }|rI|rGt1        |fi |}
|j                  dd      }|
j3                           |
j                  |g|d|i|S |c|}t        |      }
|
|j5                  d      st        |dz         }
|
|
j6                  dk(  rt&        }
|
t&        }
 |
j                  |g|i |S t9        |dd      rC|j:                  }d|vr|j%                  dd      }t        |      }
 |
j                  |g|i |S t        |t<              rzt)        |j>                        t)        |j@                        urDtB        jE                  d|j@                  jF                   d|j>                  jF                   d        |j@                  }tI        t)        |      j6                        xs t9        |d!d      }|;t*        j                  t)        |      t&              }
|
 |
j                  |g|i |S |j                  dd      }|o|d"k7  r	d|v r|dd# }t        |      }
|
|j5                  d      st        |dz         }
|
|
j6                  dk(  rt&        }
|
t&        }
 |
j                  |g|i |S t        d$|jF                   d%dj	                  d& t*        D               d      # t        $ r t        j                  |fi |}Y )w xY w# t        $ r"  t        |      j                  |g|i |cY S w xY w)(a  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PreTrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            backend (`str`, *optional*, defaults to `"tokenizers"`):
                Backend to use for tokenization. Valid options are:
                - `"tokenizers"`: Use the HuggingFace tokenizers library backend (default)
                - `"sentencepiece"`: Use the SentencePiece backend
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)

        >>> # Explicitly use the tokenizers backend
        >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="tokenizers")

        >>> # Explicitly use the sentencepiece backend
        >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="sentencepiece")
        ```configNT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3       K   | ]  }|  y wN .0cs     rR  	<genexpr>z0AutoTokenizer.from_pretrained.<locals>.<genexpr>X  s      Dq Ds   rf  zTokenizer class z is not currently imported.F)return_tensorsrs  auto_mapr   Fastr  r   r   z--code_revisionPythonBackendPreTrainedTokenizerFastz The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.
model_typer   z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   4   K   | ]  }|j                     y wr  )rg  r  s     rR  r  z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s     4[AQZZ4[s   r  )%popr  rh  
ValueErrorjoinrw  from_pretrainedr   r
   r   	for_model	Exceptionr   r  r  
isinstancerY  listreplacer   typern  rZ  r	   r   register_for_auto_classendswithrg  rl  rs  r   decoderencoderr  warning	__class__r   )clsrx  inputsr  r  _r  r  r  tokenizer_class_namers  	gguf_pathconfig_dictconfig_model_typetokenizer_configtokenizer_config_classtokenizer_auto_maphas_remote_codehas_local_code	class_refupstream_repotokenizer_class_candidate_classr  s                           rR  r  zAutoTokenizer.from_pretrained  s   d Hd+#| JJz4($4d;"JJ':DAJJ{+	 %#:#>#>~t#T #+ .~.>>qyy D,C DDEQH 
 88LMO& #34H3IId!eff2?223PdSYd]cdd#$A9WPVWI.yOPXYK))8K8F^c#331EVZ` #-- 00MXQWX!1!5!56G!N "))*:6F%5j%A"%5j%A%E%EoW[%\"
 &&2!-!R''++,=rBJJ6SUV%--fb9:(889VjY_jcijj --%5n%EF>"!%;%C%CFB%O",D8f):: 
"$. )*@AM Z,-Cf-LMUYY	 	 !!$0.q1	.q1	y  ) 5a 8 $ 9!#@.Racp! 0;IGdohnoO

?D1A3352?22-06J[_e  $/(>%78QRO&/H/Q/QRX/Y";<UX^<^"_*/G/G?/Z"3&"32?223PdSYd]cddV.5++F(637?O2?223PdSYd]cdd f23FNN#4+??6v~~7O7O6P Q%%+^^%=%=$> ?22 ^^F/V0E0EFm'RXZfhlJm
!/33DLBSTO*6667ThW]haghh "2!5!56G!N!-%)<<KaAa)?)D&78NOO&/E/N/Nv/V";<RU[<["\*/G/G?/Z"3&"32?223PdSYd]cdd/0@0@/A B++/994[IZ4[+[*\\]_
 	
u  c)99:Wb[abc>  X01GHXX14:>D s$   T. U .UU(U?>U?Nc                     |||}n||}nt        d      |||fD ]  }||t        |j                  <    |||t        |j                  <   t        j                  | ||       y)a  
        Register a new tokenizer in this mapping.

        Args:
            config_class ([`PreTrainedConfig`]):
                The configuration corresponding to the model to register.
            tokenizer_class: The tokenizer class to register (V5 - preferred parameter).
            slow_tokenizer_class: (Deprecated) The slow tokenizer to register.
            fast_tokenizer_class: (Deprecated) The fast tokenizer to register.
        Nz$You need to pass a `tokenizer_class`)exist_ok)r  r   rg  r   rn  register)config_classrs  slow_tokenizer_classfast_tokenizer_classr  	candidates         rR  r  zAutoTokenizer.register  s     "#/"6%1"6 !GHH.0DoV 	MI$CL,Y-?-?@	M  +0D0PEY#$8$A$AB""<8"Tr  )NNNF)rg  
__module____qualname____doc__r  classmethodr   rh  r   r   r  staticmethodr  r  r  rR  r  r    sZ    
 &'>?p
	1	1p
 @ p
d kpU Ur  r  rn  )NFNNNFr  )>r  rj  rN  oscollectionsr   typingr   transformers.utils.import_utilsr   configuration_utilsr   dynamic_module_utilsr   r	   modeling_gguf_pytorch_utilsr
   tokenization_utils_baser   utilsr   r   r   r   r   	utils.hubr   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_tokenizersr    tokenization_utils_sentencepiecer   
get_loggerrg  r  r   dictstrr  __annotations__r   rh  rn  ri  CONFIG_TO_TYPErS  r^  rw  PathLikeboolr  r  __all__)kvs   00rR  <module>r     sZ      	 #  G 3 \ ? <  % 2 *  BH			H	% 68 d3S	>2 702 c49n- 26+c3:o6C	%<%>/DIC	(?(A$tLC 
%<%>/DIC 
/F/H+dS	C
 
,C,E(4PC 
$;$=4HC 
'>'@#dKC 
*A*C&NC 	(C 
$;$=4HC 
9S9U5[_`C 	3C 	*C 
+B+D'$OC 
2I2K.QUVC  	&!C" 
0G0I,tT#C$ 	9%C& 
$;$=4H'C( 
&=&??TJ)C* 	,+C, 
$;$=4H-C. 	"/C0 
.E.G*TR1C2 	&3C4 
,C,E4P5C6 	%7C8 
$;$=4H9C: 
'>'@OdK;C< 	"=C> 
/F/H+dS?C@ 
'>'@OdKACB 
(?(A$tLCCD 
)@)B%MECF 
)@)B%MGCH 
(?(A_tLICJ 
"9";FKCL 	&MCN 	"OCP 	3QCR 	.SCT 
$;$=4HUCV 
*A*C&NWCX 
/F/H+dSYCZ 	 [C\ 
*A*CN]C^ 
1H1J-PTU_C` 
'>'@OdKaCb 
$;$=4HcCd 
%<%>/DIeCf 	 gCh 
/F/H+dSiCj 
!EXEZ"A`dekCl 	*mCn 
%<%>/DIoCp 
)@)BoMqCr 
)@)BoMsCt 
$;$=4HuCv 	"wCx 
(?(A$tLyCz 
&=&?"TJ{C| 
'>'@#dK}C~ 
'>'@#dKC@ 
,C,E(4PACB 
(?(A$tLCCD 
-D-F)DQECF 
#:#<$GGCH 
'>'@#dKICJ 
(?(A$tLKCL 
,C,E(4PMCN 
1H1J-PTUOCP 
)@)B%MQCR 
-D-F)DQSCT 
-D-F)DQUCV 
*A*C&NWCX 
,C,E(4PYCZ 
)C)E%4P[C\ 
$;$=4H]C^ 
+B+D$O_C` 
'>'@OdKaCb 
+B+D'$OcCd 	:eCf 
$;$=4HgCh 	%iCj 	(kCl 	.mCn 	.oCp 
.E.G?TRqCr 
(?(A_tLsCt 
*A*C&NuCv 	+wCx 	&yCz 
(?(A$tL{C| 
)@)B%M}C~ 
,C,E4PC@ 
1H1JoPTUACB 
)@)B%MCCD 
%<%>/DIECF 
.E.G*TRGCH 
,C,E(4PICJ 
0G0I,tTKCL 
(?(A_tLMCN 
0G0I,tTOCP 
0G0I,tTQCR 
.E.G*TRSCT 
"9";FUCV 
0G0I,tTWCX 
'>'@#dKYCZ 
-D-F)DQ[C\ 
$;$==4H]C^ 	"_C` 
(?(A$tLaCb 
)C)E%4PcCd 
(?(A$tLeCf 
)@)B%MgCh 
(B(D$$OiCj 
,C,E(4PkCl 
&=&?"TJmCn 
*A*C&NoCp 	%qCr 
-D-F/DQsCt 
0G0I,tTuCv 	'wCz *, #)@)B%		
yCF *, #)@)B%		
ECR *, #)@)B%		
QC^ *, #)@)B%		
]Ch 
&@&B"MiCj 
1H1JoPTUkCl 
0G0I,tTmCn 
&=&?"TJoCp 
&=&?"TJqCr 	$sCt 
!8!:EuCv 
&=&?]TJwCx 
-D-FMDQyCz 
"9";F{C| 	"}C~ 
%<%>/DIC@ 
$;$=4HACB 
(?(A_tLCCD 
(?(A$tLECF 
/F/H+dSGCH 
'>'@#dKICJ 
(?(A$tLKCL 
%<%>/DIMCN 
(?(A$tLOCP 
+B+D$OQCR 
)@)BoMSCT 
/F/H+dSUCV 
#:#<$GWCX 
&=&?"TJYCZ 
%<%>/DI[C\ 
&=&??TJ]C^ 
*A*C&N_C` 
,C,E(4PaCb 	,cCd 
#:#<$GeCf 	(gCh 
(?(A}tLiCl *, #)@)B%		
kCv 
(?(A$tLwCx 	.yCz 
'>'@OdK{C| 
&=&?"TJ}C~ 
-D-F)DQC@ 
+B+D'$OACB 
,C,E(4PCCD 
*A*C&NECF 
)@)B%MGCH 
&=&?"TJICJ 
*A*C&NKCL 
+B+D'$OMCN 
/F/H+dSOCP 
)@)B%MQCR 
-D-F)DQSCT 	 UCV 
%<%>/DIWCX 
0G0I,tTYCZ 
,C,E(4P[C\ 
*A*C&N]C^ 
)@)BoM_C` 	(aCb 	5cCd 	)eCf 
,C,E(4PgCh 
'>'@#dKiCj 
$;$=4HkCl 
*A*CNmCn 
3J3L/RVWoCp 
6M6O2UYZqCr 
-D-F)DQsCt 
(B(D$$OuCv 
*A*C&NwCx 
5O5Q1W[\yCz 
,F,H(dS{C| 	*}C~ 
+B+D$OC@ 
+B+D'$OACB 
*A*CNCCD 
1H1JPTUECF 
 7 9}tDGCH 
(?(A$tLICJ 	$KCL 
+B+D'$OMCN 
#:#<$GOCP 
$;$=4HQCR 
"9";FSCT 	.UCV 	2WCX 
$;$=4HYCZ 
+B+D$O[C\ 	"]C` *, #)@)B%		
_Cj 	-kCl 	2mCn 	7oCp 	<qCr 
*A*C&NsCt 
%<%>/DIuCv 
$;$=4HwCx 	 yCz 
1H1J-PTU{C| 
4K4M0SWX}C~ 
&=&?"TJC@ 
(?(A$tLACB 
*A*C&NCCD 
&=&?"TJECE N %%9;RS #=#7#=#=#?@41a!Q$@!)# )$s)d2B )\ 04 %)#"]#&S)9#9]R[[%%,] ] #s(^d"	]
 #:] Dj] ] ] 
#s(^]@`U `UF	 
0G As   &n