
    i&                     4   d dl mZ d dlmZmZmZmZmZ d dlZd dlm	Z	 ddl
mZmZ ddlmZ ddlmZmZmZmZmZ d	d
lmZ d	dlmZmZ d	dlmZ d	dlmZ d	dlmZm Z  d	dl!m"Z" ddl#m$Z$  ejJ                  e&      Z'e G d de             Z( G d deeee      Z)y)    )	dataclass)AnyDictOptionalTupleUnionN)nn   )ConfigMixinregister_to_config)PeftAdapterMixin)USE_PEFT_BACKEND
BaseOutputloggingscale_lora_layersunscale_lora_layers   )AttentionMixin)
PatchEmbedPixArtAlphaTextProjection)Transformer2DModelOutput)
ModelMixin)AdaLayerNormSingleRMSNorm)SanaTransformerBlock   )zero_modulec                   2    e Zd ZU eej
                     ed<   y)SanaControlNetOutputcontrolnet_block_samplesN)__name__
__module____qualname__r   torchTensor__annotations__     v/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sana.pyr   r   $   s    #ELL11r(   r   c            &           e Zd ZdZddgZddgZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d%dedee   d	ed
ededee   dee   dee   dede	de	de
dedede
de	dee   ddf$ fd       Z	 	 	 	 	 d&dej                  dej                  dej                  dej                  de	deej                     d eej                     d!eeeef      d"e
deeej                  d#f   ef   fd$Z xZS )'SanaControlNetModelTr   r   patch_embednormNin_channelsout_channelsnum_attention_headsattention_head_dim
num_layersnum_cross_attention_headscross_attention_head_dimcross_attention_dimcaption_channels	mlp_ratiodropoutattention_biassample_size
patch_sizenorm_elementwise_affinenorm_epsinterpolation_scalereturnc                    t         |           |xs |}||z  }t        |||||||dnd       | _        t	        |      | _        t        |	|      | _        t        |dd      | _	        t        j                  t        |      D cg c]  }t        |||||||||||
       c}      | _        t        j                  g       | _        t!        t        j"                  ||            | _        t        t'        | j                              D ]>  }t        j"                  ||      }t!        |      }| j                  j)                  |       @ d| _        y c c}w )	Nsincos)heightwidthr;   r.   	embed_dimr>   pos_embed_type)in_featureshidden_sizegh㈵>T)epselementwise_affine)r8   r3   r4   r5   r9   r<   r=   r7   F)super__init__r   r,   r   
time_embedr   caption_projectionr   caption_normr	   
ModuleListranger   transformer_blockscontrolnet_blocksr   Linearinput_blocklenappendgradient_checkpointing)selfr.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   	inner_dim_controlnet_block	__class__s                        r)   rK   zSanaControlNetModel.__init__.   sc   * 	#2{'*<<	 &!# 3':'F8D
 -Y7";HXfo"p#I4DQ #%-- z*  %'&#.G-E(;#1,C%'#
( "$r!2&ryyI'FGs42234 	<A!yyI>*+;<""))*:;	<
 ',#7s   Ehidden_statesencoder_hidden_statestimestepcontrolnet_condconditioning_scaleencoder_attention_maskattention_maskattention_kwargsreturn_dict.c
                 T   |#|j                         }|j                  dd      }
nd}
t        rt        | |
       n)|'|j	                  dd       t
        j                  d       |A|j                  dk(  r2d|j                  |j                        z
  dz  }|j                  d      }|A|j                  dk(  r2d|j                  |j                        z
  dz  }|j                  d      }|j                  \  }}}}| j                  j                  }||z  ||z  }}| j                  |      }|| j                  | j                  |j                  |j                                    z   }| j!                  |||j                        \  }}| j#                  |      }|j%                  |d|j                  d         }| j'                  |      }d	}t)        j*                         r<| j,                  r0| j.                  D ]   }| j1                  ||||||||      }||fz   }" n%| j.                  D ]  } ||||||||      }||fz   } d	}t3        || j4                        D ]  \  }} ||      }||fz   } t        rt7        | |
       |D cg c]  }||z  	 }}|	s|fS t9        |
      S c c}w )Nscale      ?zVPassing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective.r   r   g     )
batch_sizehidden_dtyper'   )r    )copypopr   r   getloggerwarningndimtodtype	unsqueezeshapeconfigr;   r,   rT   rL   rM   viewrN   r$   is_grad_enabledrW   rQ   _gradient_checkpointing_funcziprR   r   r   )rX   r]   r^   r_   r`   ra   rb   rc   rd   re   
lora_scaleri   num_channelsrB   rC   ppost_patch_heightpost_patch_widthembedded_timestepblock_res_samplesblockcontrolnet_block_res_samplesblock_res_sampler[   samples                            r)   forwardzSanaControlNetModel.forwardx   s"    '/446)--gs;JJdJ/+0@0D0DWd0S0_l %.*=*=*B
  ."3"3M4G4G"HHHTN+55a8N "-2H2M2MQR2R&'*@*C*CMDWDW*X&X\d%d"%;%E%Ea%H" 3@2E2E/
L&%KK"".4k5A:+((7%(8(89I9I/J\J\]j]p]pJq9r(ss&*oo-:M:M '6 '
## !% 7 78M N 5 : ::r=K^K^_aKb c $ 1 12G H   "t'B'B00 I $ A A!")*%$	! %68H$H!I 00 
I %!")*%$! %68H$H!
I (*$256GI_I_2` 	^../0@A+GK[J]+](	^ j1Rn'o1C(C'o$'o022#=YZZ (ps   J%)    r   F   r         p   i  i 	  g      @g        Fr   r   Fgư>N)rh   NNNT)r!   r"   r#    _supports_gradient_checkpointing_no_split_modules _skip_layerwise_casting_patternsr   intr   floatboolrK   r$   r%   
LongTensorr   strr   r   r   r   r   __classcell__)r\   s   @r)   r+   r+   )   s   '+$/>(5v'>$ &(#%"$3525-1 $$(--1%G,G, smG, !	G,
  G, G, $,C=G, #+3-G, &c]G, G, G, G, G, G, G,  "&!G," #G,$ &c]%G,& 
'G, G,^ %(9=1559 n[||n[  %||n[ ""	n[
 n[ "n[ !) 6n[ !.n[ #4S>2n[ n[ 
uU\\3&')AA	Bn[r(   r+   )*dataclassesr   typingr   r   r   r   r   r$   r	   configuration_utilsr   r   loadersr   utilsr   r   r   r   r   	attentionr   
embeddingsr   r   modeling_outputsr   modeling_utilsr   normalizationr   r   transformers.sana_transformerr   
controlnetr   
get_loggerr!   ro   r   r+   r'   r(   r)   <module>r      s}    " 4 4   B ' b b & > 7 ' 7 @ # 
		H	% 2: 2 2}[*nkCS }[r(   