
    i5                        d dl Z ddlmZ ddlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ dd	lm Z  dd
l!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlCmDZD i de"de&de(de4d ed!e<d"e>d#e2d$e,d%e6d&e8d'e*d(e.d)eBd*e$d+eDd,e@e0e e:d-ZEi dede
de
d$eded ed!ed"ed#ed&ed'ed(ed%ed)ed*e	d+ed,eeeed-ZF ej                  eH      ZI G d. d/      ZJ G d0 d1      ZKd2eLfd3ZMd4eLfd5ZNd6 ZOy)7    N   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerawqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfp_quanteetqhiggshqqzcompressed-tensors
fbgemm_fp8torchaobitnetvptqspqr)fp8z
auto-roundmxfp4c                   6    e Zd ZdZedefd       Zed        Zy)AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    quantization_config_dictc           	      v   |j                  d      }|j                  dd      s|j                  dd      r*|j                  dd      rdnd}t        j                  |z   }n|t        d      |t        vr,t        d| d	t        t        j                                      t        |   }|j                  |      S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrE   rG   suffix
target_clss        f/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/quantizers/auto.pyrV   z AutoQuantizationConfig.from_dictx   s    /33NC#''>BZB^B^_motBu 8 < <^U SWY`F-<<vEL! \  ??1, @/44678: 
 6lC
##$<==    c                     t        j                  |fi |}t        |dd       t        d| d      |j                  }| j                  |      } |j                  di | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized. )r   from_pretrainedgetattrrQ   r]   rV   update)rW   pretrained_model_name_or_pathkwargsmodel_configrE   r]   s         rZ   r_   z&AutoQuantizationConfig.from_pretrained   s    !112OZSYZ<!6=E;<Y;Z  [M  N  $0#C#C !mm,DE""",V,""r[   N)__name__
__module____qualname____doc__classmethoddictrV   r_   r^   r[   rZ   rD   rD   r   s6    
 > > >( 
# 
#r[   rD   c                   r    e Zd ZdZedeez  fd       Zed        Zedeez  dedz  fd       Z	e
d        Zy)	AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    r]   c           	      D   t        |t              rt        j                  |      }|j                  }|t
        j                  k(  r|j                  r|dz  }n|dz  }|t        vr,t        d| dt        t        j                                      t        |   } ||fi |S )NrK   rJ   rM   rN   )
isinstancerj   rD   rV   rG   r   rP   rH   rT   rQ   rS   rU   )rW   r]   rc   rG   rY   s        rZ   from_configzAutoHfQuantizer.from_config   s     )40"8"B"BCV"W*77 -<<<"//''551, @/44678: 
 ,L9
-888r[   c                 P    t        j                  |fi |}| j                  |      S )N)rD   r_   ro   )rW   rb   rc   r]   s       rZ   r_   zAutoHfQuantizer.from_pretrained   s*    4DDEbmflm233r[   quantization_config_from_argsNc           
         |d}nd}t        |t              r;t        |t              rt        j                  |      }nt        j                  |      }|g|j
                  j                  |j
                  j                  k7  r:t        d|j
                  j                   d|j
                  j                   d      t        |t        t        t        t        t        t        t        f      rW|U|j                         }|j                         D ]  \  }}t!        |||        |dt#        |j%                                dz  }|dk7  r-t        |t        t        f      st'        j(                  |       |S t*        j-                  |       |S )z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        zYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. zThe model is quantized with z but you are passing a z| config. Please make sure to pass the same quantization config class to `from_pretrained` with different loading attributes.z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rn   rj   r   rV   rD   	__class__re   rQ   r   r   r   r   r   r   get_loading_attributesitemssetattrrS   rU   warningswarnloggerinfo)rW   r]   rq   warning_msgloading_attr_dictattrvals          rZ   merge_quantization_configsz*AutoHfQuantizer.merge_quantization_configs   s    )4y 
 K)407I&5&?&?@S&T#&<&F&FGZ&[# *5#--66:W:a:a:j:jj./B/L/L/U/U.VVm  oL  oV  oV  o_  o_  n` `F F  ###+( .9 = T T V.446 8	c+T378 ?EVE[E[E]@^?_  `}  ~  ~K"Z0CkSgEh%iMM+& #" KK$""r[   c           	      ^   | j                  dd       }| j                  dd      s| j                  dd      r*| j                  dd      rdnd}t        j                  |z   }n|t        d      |t        vr8t
        j                  d| d	t        t        j                                d
       yy)NrG   rH   FrI   rJ   rK   rL   rM   rN   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rO   r   rP   rQ   rR   rz   warningrS   rT   rU   )rE   rG   rX   s      rZ   supports_quant_methodz%AutoHfQuantizer.supports_quant_method   s    /33NDI#''>BZB^B^_motBu 8 < <^U SWY`F-<<vEL! \  ??NN1, @/44678 9ii
 r[   )re   rf   rg   rh   ri   r   rj   ro   r_   r   staticmethodr   r^   r[   rZ   rl   rl      s    
 9.E.L 9 90 4 4 :#!$;;:# (?'E:# :#x  r[   rl   methodc                       fd}|S )z-Register a custom quantization configuration.c                 ~    t         v rt        d d      t        | t              st	        d      | t         <   | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)rR   rQ   
issubclassr   	TypeError)rW   r   s    rZ   register_config_fnz8register_quantization_config.<locals>.register_config_fn  sH    55xx/CDEE#67HII36(0
r[   r^   )r   r   s   ` rZ   register_quantization_configr     s     r[   namec                       fd}|S )zRegister a custom quantizer.c                 ~    t         v rt        d d      t        | t              st	        d      | t         <   | S )NzQuantizer 'r   z!Quantizer must extend HfQuantizer)rT   rQ   r   r   r   )rW   r   s    rZ   register_quantizer_fnz1register_quantizer.<locals>.register_quantizer_fn$  sG    )){4&0DEFF#{+?@@'*t$
r[   r^   )r   r   s   ` rZ   register_quantizerr   !  s     ! r[   c                 "   t        | d      }|r!t        j                  | j                        sd}|s|Q|r&t        j	                  | j                  |      | _        n|| _        t        j                  | j                  |      }nd }||j                  ||       |j                  |      }|j                  |       } |j                  |       } t        |j                  dd      s&|j                  j                  }t        |d|      |d<   || |fS )Nr]   F)pre_quantized)
device_mapweights_only
dequantizevaluequant)hasattrrl   r   r]   r   ro   validate_environmentupdate_device_mapupdate_tp_planupdate_ep_planr`   rG   )configr]   r   r   
user_agentr   hf_quantizerrG   s           rZ   get_hf_quantizerr   1  s   F$9:M_BB6C]C]^+7)8)S)S**,?*F& *=F&&22&&' 3 

 ))!% 	* 	
 "33J?
,,V4,,V4 |77uM';;HHL"),"NJw++r[   )Prx   models.auto.configuration_autor   utilsr   utils.quantization_configr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   quantizer_aqlmr   quantizer_auto_roundr   quantizer_awqr   quantizer_bitnetr    quantizer_bnb_4bitr!   quantizer_bnb_8bitr"   quantizer_compressed_tensorsr#   quantizer_eetqr$   quantizer_fbgemm_fp8r%   quantizer_finegrained_fp8r&   quantizer_fp_quantr'   quantizer_gptqr(   quantizer_higgsr)   quantizer_hqqr*   quantizer_mxfp4r+   quantizer_quantor,   quantizer_quarkr-   quantizer_spqrr.   quantizer_torchaor/   quantizer_vptqr0   rT   rR   
get_loggerre   rz   rD   rl   strr   r   r   r^   r[   rZ   <module>r      s    7      .  + 4 ' / 2 2 F + 6 @ 2 + - ) - / - + 1 +	<+ + O	
 O   " O  
> 6 & !   O!" O#$ %$) .$	9$+$ +$ J	$
 J$ J$ l$ [$ $ 
9$ 1$ /$ [$ }$ $  J!$" J#$$  !)$  . 
		H	%&# &#Rs sl  !S ! !,r[   