
    i7                     |   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl
mZ ddlmZ ddlmZmZmZ  ej$                  e      ZdZd	Zd
Zeez   ZdZdZe G d d             Z G d d      Z G d de      Zdedeee   ee   f   fdZde	j                  j@                  defdZ!dej@                  dede"fdZ#y)    N)	dataclass)DictListOptionalTuple   )logging   )HookRegistry	ModelHookStateManagertaylorseer_cache)z^blocks.*attnz^transformer_blocks.*attnz ^single_transformer_blocks.*attn)z"^temporal_transformer_blocks.*attn)z^[^.]*block[^.]*\.[^.]+$)z
^proj_out$c                       e Zd ZU dZdZeed<   dZeed<   dZe	e   ed<   dZ
eed	<   ej                  Ze	ej                     ed
<   dZe	ee      ed<   dZe	ee      ed<   dZeed<   defdZy)TaylorSeerCacheConfiga  
    Configuration for TaylorSeer cache. See: https://huggingface.co/papers/2503.06923

    Attributes:
        cache_interval (`int`, defaults to `5`):
            The interval between full computation steps. After a full computation, the cached (predicted) outputs are
            reused for this many subsequent denoising steps before refreshing with a new full forward pass.

        disable_cache_before_step (`int`, defaults to `3`):
            The denoising step index before which caching is disabled, meaning full computation is performed for the
            initial steps (0 to disable_cache_before_step - 1) to gather data for Taylor series approximations. During
            these steps, Taylor factors are updated, but caching/predictions are not applied. Caching begins at this
            step.

        disable_cache_after_step (`int`, *optional*, defaults to `None`):
            The denoising step index after which caching is disabled. If set, for steps >= this value, all modules run
            full computations without predictions or state updates, ensuring accuracy in later stages if needed.

        max_order (`int`, defaults to `1`):
            The highest order in the Taylor series expansion for approximating module outputs. Higher orders provide
            better approximations but increase computation and memory usage.

        taylor_factors_dtype (`torch.dtype`, defaults to `torch.bfloat16`):
            Data type used for storing and computing Taylor series factors. Lower precision reduces memory but may
            affect stability; higher precision improves accuracy at the cost of more memory.

        skip_predict_identifiers (`List[str]`, *optional*, defaults to `None`):
            Regex patterns (using `re.fullmatch`) for module names to place as "skip" in "cache" mode. In this mode,
            the module computes fully during initial or refresh steps but returns a zero tensor (matching recorded
            shape) during prediction steps to skip computation cheaply.

        cache_identifiers (`List[str]`, *optional*, defaults to `None`):
            Regex patterns (using `re.fullmatch`) for module names to place in Taylor-series caching mode, where
            outputs are approximated and cached for reuse.

        use_lite_mode (`bool`, *optional*, defaults to `False`):
            Enables a lightweight TaylorSeer variant that minimizes memory usage by applying predefined patterns for
            skipping and caching (e.g., skipping blocks and caching projections). This overrides any custom
            `inactive_identifiers` or `active_identifiers`.

    Notes:
        - Patterns are matched using `re.fullmatch` on the module name.
        - If `skip_predict_identifiers` or `cache_identifiers` are provided, only matching modules are hooked.
        - If neither is provided, all attention-like modules are hooked by default.

    Example of inactive and active usage:

    ```py
    def forward(x):
        x = self.module1(x)  # inactive module: returns zeros tensor based on shape recorded during full compute
        x = self.module2(x)  # active module: caches output here, avoiding recomputation of prior steps
        return x
    ```
       cache_interval   disable_cache_before_stepNdisable_cache_after_stepr
   	max_ordertaylor_factors_dtypeskip_predict_identifierscache_identifiersFuse_lite_modereturnc                     d| j                    d| j                   d| j                   d| j                   d| j                   d| j
                   d| j                   d| j                   d	S )
Nz%TaylorSeerCacheConfig(cache_interval=z, disable_cache_before_step=z, disable_cache_after_step=z, max_order=z, taylor_factors_dtype=z, skip_predict_identifiers=z, cache_identifiers=z, use_lite_mode=))r   r   r   r   r   r   r   r   selfs    j/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/hooks/taylorseer_cache.py__repr__zTaylorSeerCacheConfig.__repr__\   s    "112 3))-)G)G(H I((,(E(E'F G( )$$($=$=#> ?((,(E(E'F G!!%!7!7 8 9!//03
	
    )__name__
__module____qualname____doc__r   int__annotations__r   r   r   r   torchbfloat16r   dtyper   r   strr   r   boolr!    r"   r    r   r      s    5n NC%&s&.2hsm2Is27..(5;;/@48htCy18-1xS	*1M4
# 
r"   r   c                       e Zd Zej                  ddfdeej                     dedefdZ	dd	Z
d
eej                  df   ddfdZej                  j                  deej                     fd       Zy)TaylorSeerStater
   Fr   r   is_inactivec                     || _         || _        || _        d| _        d | _        i | _        d | _        d | _        d| _        y )Nr.   )	r   r   r1   module_dtypeslast_update_steptaylor_factorsinactive_shapesdevicecurrent_step)r   r   r   r1   s       r    __init__zTaylorSeerState.__init__k   sJ     %9!"&68/3BDFJ.2!#r"   r   Nc                 J    d| _         d | _        i | _        d | _        d | _        y )Nr3   )r9   r5   r6   r7   r8   r   s    r    resetzTaylorSeerState.reset|   s)     $ #r"   outputs.c           	         t        d |D              | _        |d   j                  | _        | j                  rt        d |D              | _        nt        |      D ]  \  }}d|i}| j                  d u }|s| j                  | j                  z
  }|dk(  rt        d      | j                  j                  |i       }t        | j                        D ]A  }|j                  |      }	|	 n,||   |	j                  |j                        z
  |z  ||dz   <   C |j                         D 
ci c]!  \  }
}|
|j                  | j                         # c}}
| j                  |<    | j                  | _        y c c}}
w )Nc              3   4   K   | ]  }|j                     y wNr+   .0outputs     r    	<genexpr>z)TaylorSeerState.update.<locals>.<genexpr>   s     "FF6<<"F   r   c              3   4   K   | ]  }|j                     y wr@   )shaperB   s     r    rE   z)TaylorSeerState.update.<locals>.<genexpr>   s     (L&(LrF   z0Delta step cannot be zero for TaylorSeer update.r
   )tupler4   r8   r1   r7   	enumerater5   r9   
ValueErrorr6   getranger   tor+   itemsr   )r   r=   ifeaturesnew_factorsis_first_update
delta_stepprev_factorsjprevorderfactors               r    updatezTaylorSeerState.update   sq    #"Fg"FFaj''#((LG(L#LD (1 8898}"&"7"74"?&!%!2!2T5J5J!JJ!Q()[\\ $(#6#6#:#:1b#AL"4>>2 e+//2<!.9!ntwwx~~?V.VZd-dAE*	e VaUfUfUh*DQE6E699T%>%>??*##A&& !% 1 1	*s   (&E4c                 X   | j                   t        d      | j                  | j                   z
  }g }| j                  r| j                  t        d      t        t        | j                              D ]M  }|j                  t        j                  | j                  |   | j                  |   | j                               O |S | j                  st        d      t        | j                        }t        | j                  d         }t        |      D ]  }| j                  |   }| j                  |   }t        j                  |d   |      }t        |      D ]9  }	||	z  t        j                  |	      z  }
||	   }||j!                  |      |
z  z   }; |j                  |        |S )Nz3Cannot predict without prior initialization/update.z*Inactive shapes not set during prediction.)r+   r8   z'Taylor factors empty during prediction.r   rA   )r5   rK   r9   r1   r7   rM   lenr4   appendr)   zerosr8   r6   
zeros_likemath	factorialrN   )r   step_offsetr=   rP   num_outputs
num_ordersoutput_dtyper6   rD   rX   coeffrY   s               r    predictzTaylorSeerState.predict   s     (RSS''$*?*??##+ !MNN3t1123 KK,,Q/"003#{{,  && !JKKd112KT0034J;' '#11!4!%!4!4Q!7)).*;<P":. FE(%/4>>%3HHE+E2F#fii&=&EEFF v&' r"   )r   N)r#   r$   r%   r)   r*   r   r+   r'   r-   r:   r<   r   TensorrZ   compilerdisabler   rg   r.   r"   r    r0   r0   j   s     7<nn!	$&u{{3$ $ 	$"2u||S()2 
2> ^^ ell+    r"   r0   c                   4    e Zd ZdZ	 ddededej                  dedee   f
 fdZ	d	ej                  j                  fd
Zd	ej                  j                  ddfdZej                  j                  defd       Zd	ej                  j                  fdZ xZS )TaylorSeerCacheHookTNr   r   r   state_managerr   c                 h    t         |           || _        || _        || _        || _        || _        y r@   )superr:   r   r   r   r   rm   )r   r   r   r   rm   r   	__class__s         r    r:   zTaylorSeerCacheHook.__init__   s8     	,)B&(@%$8!*r"   modulec                     |S r@   r.   r   rq   s     r    initialize_hookz#TaylorSeerCacheHook.initialize_hook   s    r"   r   c                 8    | j                   j                          y)z4
        Reset state between sampling runs.
        N)rm   r<   rs   s     r    reset_statezTaylorSeerCacheHook.reset_state   s     	  "r"   c                 4   | j                   j                         }|xj                  dz  c_        |j                  }|| j                  k  }|| j                  z
  dz
  | j                  z  dk(  }| j
                  d uxr || j
                  k\  }|xs |xs |}||fS Nr
   r   )rm   	get_stater9   r   r   r   )r   stater9   is_warmup_phaseis_compute_intervalis_cooldown_phaseshould_computes          r    _measure_should_computez+TaylorSeerCacheHook._measure_should_compute   s    !%!3!3!=!=!?a))&)G)GG+d.L.LLqPTXTgTggkll 99Ew,Z^ZwZwJw(T,?TCTu$$r"   c                 $   | j                         \  }}|rN | j                  j                  |i |}t        |t        j
                        r|fn|}|j                  |       |S |j                         }t        |      dk(  r|d   S t        |      S rx   )
r   fn_reforiginal_forward
isinstancer)   rh   rZ   rg   r\   rI   )	r   rq   argskwargsr~   rz   r=   wrapped_outputsoutputs_lists	            r    new_forwardzTaylorSeerCacheHook.new_forward   s     $ < < >2dkk22DCFCG,6w,MwjSZOLL)N}}"%l"3q"8|AQeL>QQr"   r@   )r#   r$   r%   _is_statefulr'   r)   r+   r   r   r:   nnModulert   rv   ri   rj   r-   r   r   __classcell__)rp   s   @r    rl   rl      s    L 37++ $'+ $kk	+
 $+ #+3-+ehhoo #%((// #d # ^^% % %	R%((// 	Rr"   rl   configr   c                     | j                   | j                   nd}| j                  | j                  nd}|xs g |xs g fS )zV
    Resolve effective inactive and active pattern lists from config + templates.
    N)r   r   )r   inactive_patternsactive_patternss      r    _resolve_patternsr      sM    
 <B;Z;Z;f77lp282J2J2Vf..\`O"O$9r99r"   rq   c                    t        |      \  }}|xs t        }|j                  rNt        j	                  d       t
        }t        }|j                  s|j                  rt        j                  d       | j                         D ]@  \  }t        fd|D              }t        fd|D              }|s|s3t        |||       B y)a  
    Applies the TaylorSeer cache to a given pipeline (typically the transformer / UNet).

    This function hooks selected modules in the model to enable caching or skipping based on the provided
    configuration, reducing redundant computations in diffusion denoising loops.

    Args:
        module (torch.nn.Module): The model subtree to apply the hooks to.
        config (TaylorSeerCacheConfig): Configuration for the cache.

    Example:
    ```python
    >>> import torch
    >>> from diffusers import FluxPipeline, TaylorSeerCacheConfig

    >>> pipe = FluxPipeline.from_pretrained(
    ...     "black-forest-labs/FLUX.1-dev",
    ...     torch_dtype=torch.bfloat16,
    ... )
    >>> pipe.to("cuda")

    >>> config = TaylorSeerCacheConfig(
    ...     cache_interval=5,
    ...     max_order=1,
    ...     disable_cache_before_step=3,
    ...     taylor_factors_dtype=torch.float32,
    ... )
    >>> pipe.transformer.enable_cache(config)
    ```
    z(Using TaylorSeer Lite variant for cache.z"Lite mode overrides user patterns.c              3   J   K   | ]  }t        j                  |        y wr@   re	fullmatchrC   patternnames     r    rE   z)apply_taylorseer_cache.<locals>.<genexpr>.  s     \wr||GT:\    #c              3   J   K   | ]  }t        j                  |        y wr@   r   r   s     r    rE   z)apply_taylorseer_cache.<locals>.<genexpr>/  s     XWR\\'48Xr   )rq   r   r1   N)r   _TRANSFORMER_BLOCK_IDENTIFIERSr   loggerinfo_PROJ_OUT_IDENTIFIERS_BLOCK_IDENTIFIERSr   r   warningnamed_modulesany_apply_taylorseer_cache_hook)rq   r   r   r   	submodulematches_inactivematches_activer   s          @r    apply_taylorseer_cacher     s    > *;6)B&%G)GO>?/.**f.F.FNN?@!//1 	
i\J[\\XXX N$(	
	
r"   r1   c                    t        t        |j                  |j                  |d      }t	        j
                  |       }t        |j                  |j                  |j                  |j                  |      }|j                  |t               y)a  
    Registers the TaylorSeer hook on the specified nn.Module.

    Args:
        name: Name of the module.
        module: The nn.Module to be hooked.
        config: Cache configuration.
        is_inactive: Whether this module should operate in "inactive" mode.
    )r   r   r1   )init_kwargs)r   r   r   r   rm   N)r   r0   r   r   r   check_if_exists_or_initializerl   r   r   r   register_hook_TAYLORSEER_CACHE_HOOK)rq   r   r1   rm   registryhooks         r    r   r   9  s     !$*$?$?))&
M 99&AH,,"("B"B#88!'!@!@#D 4!78r"   )$r`   r   dataclassesr   typingr   r   r   r   r)   torch.nnr   utilsr	   hooksr   r   r   
get_loggerr#   r   r   $_SPATIAL_ATTENTION_BLOCK_IDENTIFIERS%_TEMPORAL_ATTENTION_BLOCK_IDENTIFIERSr   r   r   r   r0   rl   r,   r   r   r   r-   r   r.   r"   r    <module>r      s    	 ! . .    8 8 
		H	%+ ( $
 )P %!EHm!m 3 '  L
 L
 L
^Y Yx/R) /Rd:3 :d3ic>R8S :3
588?? 3
<Q 3
l!9II!9!!9 !9r"   