
    i}                         d dl mZmZmZmZmZ ddlmZmZm	Z	m
Z
mZ ddlmZ erddlmZ  e
       r
d dlZd dlmZ  e       rd dlmZ  ej*                  e      Z G d	 d
e      Zy)    )TYPE_CHECKINGAnyDictListUnion   )get_module_from_nameis_accelerate_availableis_nvidia_modelopt_availableis_torch_availablelogging   )DiffusersQuantizer)
ModelMixinN)set_module_tensor_to_devicec            
           e Zd ZdZdZdZdgZ fdZd Zddd	d
de	de
e	ef   fdZddd	d
de	ddfdZde
e	eee	f   f   de
e	eee	f   f   fdZddZdddZdddee	   fdZg fdddee	   fdZd Zed        Zed        Z xZS )NVIDIAModelOptQuantizerz8
    Diffusers Quantizer for Nvidia-Model Optimizer
    TFnvidia_modeloptc                 &    t        |   |fi | y N)super__init__)selfquantization_configkwargs	__class__s      z/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/quantizers/modelopt/modelopt_quantizer.pyr   z NVIDIAModelOptQuantizer.__init__%   s    ,77    c                    t               st        d      d| _        |j                  dd       }t	        |t
              rDd|j                         v sd|j                         v r| j                  rt        d      d| _        y y y )NzkLoading an nvidia-modelopt quantized model requires nvidia-modelopt library (`pip install nvidia-modelopt`)F
device_mapcpudiskzYou are attempting to perform cpu/disk offload with a pre-quantized modelopt model This is not supported yet. Please remove the CPU or disk device from the `device_map` argument.T)	r   ImportErroroffloadget
isinstancedictvaluespre_quantized
ValueError)r   argsr   r    s       r   validate_environmentz,NVIDIAModelOptQuantizer.validate_environment(   s    +-}  ZZd3
j$'
))++v9J9J9L/L%%$z 
 $(DL 0M (r   modelr   param_valueztorch.Tensor
param_name
state_dictc                 b    ddl m} t        ||      \  }}| j                  ry ||      rd|v ryy)Nr   )is_quantizedTweightF)!modelopt.torch.quantization.utilsr2   r	   r)   )	r   r-   r.   r/   r0   r   r2   moduletensor_names	            r   check_if_quantized_paramz0NVIDIAModelOptQuantizer.check_if_quantized_param;   s:     	C25*E&!h+&=r   target_deviceztorch.devicec                    ddl mc m} |j                  dt        j                        }t        ||      \  }	}
| j                  r=t        j                  j                  |j                  |            |	j                  |
<   yt        |||||       |j                  |	| j                  j                  d   | j                  j                         |j!                  |	       d|	j"                  _        y)zh
        Create the quantized parameter by calling .calibrate() after setting it to the module.
        r   Ndtype)device	algorithmF)modelopt.torch.quantizationtorchquantizationr%   float32r	   r)   nn	Parameterto_parametersr   	calibrater   modelopt_configforward_loopcompressr3   requires_grad)r   r-   r.   r/   r8   r+   r   mtqr:   r5   r6   s              r   create_quantized_paramz.NVIDIAModelOptQuantizer.create_quantized_paramM   s     	21

7EMM225*E.3hh.@.@WdAe.fF{+'z=+W\]MM00@@MtOgOgOtOt LL */FMM'r   
max_memoryreturnc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   rL   keyvals       r   adjust_max_memoryz)NVIDIAModelOptQuantizer.adjust_max_memoryh   s6    6@6F6F6HI(#sc3:oI
I Js   )c                 X    | j                   j                  dk(  rt        j                  }|S )NFP8)r   
quant_typer>   float8_e4m3fn)r   target_dtypes     r   adjust_target_dtypez+NVIDIAModelOptQuantizer.adjust_target_dtypel   s'    ##..%7 ..Lr   c                 T    |%t         j                  d       t        j                  }|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)loggerinfor>   r@   )r   torch_dtypes     r   update_torch_dtypez*NVIDIAModelOptQuantizer.update_torch_dtypeq   s$    KKpq--Kr   c                 l   t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  f}g }|j                         D ]B  \  }}t        ||      s|j                  d      D ]  \  }}|j                  | d|         D |S )z
        Get parameter names for all convolutional layers in a HuggingFace ModelMixin. Includes Conv1d/2d/3d and
        ConvTranspose1d/2d/3d.
        F)recurse.)rA   Conv1dConv2dConv3dConvTranspose1dConvTranspose2dConvTranspose3dnamed_modulesr&   named_parametersappend)r   r-   
conv_typesconv_param_namesnamer5   r/   _s           r   get_conv_param_namesz,NVIDIAModelOptQuantizer.get_conv_param_namesw   s     IIIIII

 !//1 	DLD&&*-%+%<%<U%<%K DMJ$++tfAj\,BCD	D
  r   keep_in_fp32_modulesc                    dd l mc m} | j                  ry | j                  j
                  }|g }t        |t              r|g}|j                  |       | j                  j                  r |j                  | j                  |             |D ]&  }ddi| j                  j                  d   d|z   dz   <   ( || j                  _        |j                  |d| j                  j                  fg       | j                  |j                  _        y )Nr   enableF	quant_cfg*quantize)mode)modelopt.torch.optr>   optr)   r   modules_to_not_convertr&   strextenddisable_conv_quantizationrn   rF   
apply_modeconfig)r   r-   r    ro   r   mtorx   r5   s           r   $_process_model_before_weight_loadingz<NVIDIAModelOptQuantizer._process_model_before_weight_loading   s     	)(!%!9!9!P!P!)%'",c2&<%="%%&:;##=="))$*C*CE*JK, 	jFYachXiD$$44[A#,QTBTU	j:P  7uZ1I1I1Y1Y$Z#[\+/+C+C(r   c                     ddl m} | j                  r|S |j                         D ]2  \  }}t	        ||j
                        s||us"|j                  |       4 |S )Nr   )ModeloptStateManager)rv   r   r)   rg   hasattr
_state_keyremove_state)r   r-   r   r   rm   ms         r   #_process_model_after_weight_loadingz;NVIDIAModelOptQuantizer._process_model_after_weight_loading   s[    ;L'') 	5DAqq.99:q~$11!4	5 r   c                      y)NT r   s    r   is_trainablez$NVIDIAModelOptQuantizer.is_trainable   s    r   c                 <    | j                   j                  d       y)Nsaving)	operationT)r   check_model_patchingr   s    r   is_serializablez'NVIDIAModelOptQuantizer.is_serializable   s      555Ir   )rW   torch.dtyperM   r   r   )r\   r   rM   r   )__name__
__module____qualname____doc__use_keep_in_fp32_modulesrequires_calibrationrequired_packagesr   r,   ry   r   r   r7   rK   r   intrR   rX   r]   r   rn   r   r   propertyr   r   __classcell__)r   s   @r   r   r      s5     $ *+8(& $ 	
 cN$00 $0 	0
 &06DeCHo1E,F 4PSUZ[^`c[cUdPdKe 
 ,  49  4 +-	DD #3i	D:    r   r   )typingr   r   r   r   r   utilsr	   r
   r   r   r   baser   models.modeling_utilsr   r>   torch.nnrA   accelerate.utilsr   
get_loggerr   rZ   r   r   r   r   <module>r      s\    8 8  & 3 < 
		H	%b0 br   