
    ikF                     :   d dl mZ d dlmZmZ d dlZd dlmZ ddlmZm	Z	 ddl
mZmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ  ej.                  e      Ze G d de             Z G d dej6                        Z G d deee      Z G d dej6                        Z G d dej6                        Z G d dej6                        Z  G d dej6                        Z! G d dej6                        Z" G d dej6                        Z# G d d ej6                        Z$y)!    )	dataclass)TupleUnionN)nn   )ConfigMixinregister_to_config)
BaseOutputlogging   )AttentionMixin)	AttentionAttnProcessor)TimestepEmbedding	Timesteps)
ModelMixinc                   0    e Zd ZU dZej
                  ed<   y)Kandinsky3UNetOutputNsample)__name__
__module____qualname__r   torchTensor__annotations__     p/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/models/unets/unet_kandinsky3.pyr   r       s    FELLr   r   c                   $     e Zd Z fdZd Z xZS )Kandinsky3EncoderProjc                     t         |           t        j                  ||d      | _        t        j
                  |      | _        y )NF)bias)super__init__r   Linearprojection_linear	LayerNormprojection_norm)selfencoder_hid_dimcross_attention_dim	__class__s      r   r$   zKandinsky3EncoderProj.__init__&   s7    !#?<OV[!\!||,?@r   c                 J    | j                  |      }| j                  |      }|S N)r&   r(   )r)   xs     r   forwardzKandinsky3EncoderProj.forward+   s'    ""1%  #r   r   r   r   r$   r0   __classcell__r,   s   @r   r    r    %   s    A
r   r    c                        e Zd Ze	 	 	 	 	 	 	 	 ddededededeeee   f   deedf   deeee   f   d	ef fd
       Zd ZddZ	 xZ
S )Kandinsky3UNetin_channelstime_embedding_dimgroupsattention_head_dimlayers_per_blockblock_out_channels.r+   r*   c	                     t         |           d}	d}
d}d}|}|d   dz  }t        |dd      | _        t	        ||      | _        t        |||      | _        t        j                  ||dd	      | _
        t        ||      | _        |gt        |      z   }t        t        |d d
 |dd              }|D cg c]  }|r|nd 
 }}t        |      |gz  }|||g}t!        t"        |      }g }t        |      | _        t        j&                  g       | _        t+        t        |g|       D ]r  \  }\  \  }}}}}|| j$                  dz
  k7  }|j-                  || j$                  dz
  k7  r|nd       | j(                  j-                  t/        ||||||||	|
||             t t        j&                  g       | _        t+        t        t#        |      g|       D ]O  \  }\  \  }}}}}|dk7  }| j0                  j-                  t3        ||j5                         |||||||	|
||             Q t        j6                  ||      | _        t        j:                         | _        t        j                  ||dd	      | _        y c c}w )N   r   )FTTTr   F   )flip_sin_to_cosdownscale_freq_shiftr   kernel_sizepadding) r#   r$   r   	time_projr   time_embeddingKandinsky3AttentionPoolingadd_time_conditionr   Conv2dconv_inr    encoder_hid_projlistziplenmapreversed
num_levels
ModuleListdown_blocks	enumerateappendKandinsky3DownSampleBlock	up_blocksKandinsky3UpSampleBlockpop	GroupNormconv_norm_outSiLUconv_act_outconv_out) r)   r6   r7   r8   r9   r:   r;   r+   r*   expansion_ratiocompression_ratioadd_cross_attentionadd_self_attentionout_channelsinit_channelshidden_dimsin_out_dimsis_exist	text_dims
num_blockslayer_paramsrev_layer_paramscat_dimslevelin_dimout_dimres_block_numtext_dimself_attentiondown_sample	up_sampler,   s                                   r   r$   zKandinsky3UNet.__init__2   s    	 76"*1-2"=%^_`/

 #= 35G#
 yymTUV 5oGZ [$o-?(@@3{3B/QRABObc8H($>c	c+,0@/AA
"I/ABx6k*==,S\+l+T
 	OEO%VW}h  DOOa$78KOOu11D'EG1M##)&!&#%"	* r*S\%9(89T
 	OEO%Wf}h 
INN!!'LLN&!&#%"		*  \\&-@GGI		-1VWXk ds   5I;c                 6    | j                  t                      y)ze
        Disables custom attention processors and sets the default attention implementation.
        N)set_attn_processorr   )r)   s    r   set_default_attn_processorz)Kandinsky3UNet.set_default_attn_processor   s     	0r   c                    |2d|j                  |j                        z
  dz  }|j                  d      }t        j                  |      sTt        |t              rt        j                  nt        j                  }t        j                  |g||j                        }n6t        |j                        dk(  r|d    j                  |j                        }|j                  |j                  d         }| j                  |      j                  |j                        }| j                  |      }| j!                  |      }|| j#                  |||      }g }	| j%                  |      }t'        | j(                        D ]4  \  }
} |||||      }|
| j*                  dz
  k7  s$|	j-                  |       6 t'        | j.                        D ]<  \  }
}|
dk7  r't        j0                  ||	j3                         gd      } |||||      }> | j5                  |      }| j7                  |      }| j9                  |      }|s|fS t;        |      S )Nr>   g     )dtypedevicer   dim)r   )tory   	unsqueezer   	is_tensor
isinstancefloatfloat32int32tensorrz   rN   shapeexpandrE   rF   rK   rH   rJ   rT   rS   rQ   rU   rW   catrY   r[   r]   r^   r   )r)   r   timestepencoder_hidden_statesencoder_attention_maskreturn_dictry   time_embed_input
time_embedhidden_statesrm   rs   rt   s                r   r0   zKandinsky3UNet.forward   s   !-&'*@*C*CFLL*Q&QU]%]"%;%E%Ea%H"x(%/%%@EMMekkE||XJeFMMRH A%~((7H ??6<<?3>>(366v||D(()9:
 $ 5 56K L ,00=RTjkJf%"+D,<,<"= 	-E; 5JLbcF!++$$V,	-
 !*$.. 9 	bE9zFM,=,=,?#@aHvz3HJ`aF	b
 ##F+""6*v&9#622r   )r=          @   r   )i  i   r   i      r   )NNT)r   r   r   r	   intr   r   r$   rw   r0   r2   r3   s   @r   r5   r5   1   s     "&"$34.D6:#[Y[Y  [Y 	[Y
  [Y  U3Z0[Y "#s(O[Y #3c
?3[Y [Y [Yz1'3r   r5   c                   8     e Zd Z	 	 	 	 	 	 	 	 d fd	ZddZ xZS )rX   c                    t         |           d |rdnd d d ggd gdz  g|dz
  z  z   }||z   |fg||fg|dz
  z  z   ||fgz   }g }g }g }|| _        || _        |r |j	                  t        ||d |||	             n#|j	                  t        j                                t        ||      D ]  \  \  }}}|j	                  t        |||||
|             | |j	                  t        ||||||	             n#|j	                  t        j                                |j	                  t        |||||
              t        j                  |      | _        t        j                  |      | _        t        j                  |      | _        y )NTr=   r>   r   r#   r$   rr   context_dimrU   Kandinsky3AttentionBlockr   IdentityrM   Kandinsky3ResNetBlockrR   
attentions
resnets_inresnets_out)r)   r6   cat_dimrc   time_embed_dimr   ri   r8   head_dimr_   r`   rt   rr   up_resolutionshidden_channelsr   r   r   
in_channelout_channelup_resolutionr,   s                        r   r$   z Kandinsky3UpSampleBlock.__init__   s    	dDIJtfWXj\]gjk]kMllG#[12[)*j1n=>\*+, 	
 

,&(~tVU]_no bkkm,8;O^8\ 	4%Z}%j*nfVgivw &!!,"NKSb !!"++-0%j+~vWhi	$ --
3--
3==5r   c                    t        | j                  dd  | j                  | j                        D ]0  \  }}} |||      }| j                   ||||||      } |||      }2 | j
                  r | j                  d   |||      }|S )Nr>   r   
image_mask)rM   r   r   r   r   rr   	r)   r/   r   contextcontext_maskr   	attention	resnet_in
resnet_outs	            r   r0   zKandinsky3UpSampleBlock.forward   s    03DOOAB4GZ^ZjZj0k 	*,Iy*!Z(A+aWlJO1j)A		* ""1jZHAr   Nr   r   r   r=   r   TTNNNr1   r3   s   @r   rX   rX      s(     86t	r   rX   c                   8     e Zd Z	 	 	 	 	 	 	 	 d fd	ZddZ xZS )rV   c                    t         |           g }g }g }|| _        || _        |r |j	                  t        ||d |||             n#|j	                  t        j                                d gdz  g|dz
  z  d d |
rdnd d ggz   }||fg||fg|dz
  z  z   }t        ||      D ]  \  \  }}}|j	                  t        |||||	             | |j	                  t        ||||||             n#|j	                  t        j                                |j	                  t        |||||	|              t        j                  |      | _        t        j                  |      | _        t        j                  |      | _        y )Nr=   r>   Fr   )r)   r6   rc   r   r   ri   r8   r   r_   r`   rs   rr   r   r   r   r   r   r   r   r   r,   s                       r   r$   z"Kandinsky3DownSampleBlock.__init__  s    	

,&(ndFT\^mn bkkm,&1*a8T4R]cgim<n;oo'67L,;W:X\fij\j:kk8;O^8\ 	4%Z}%j+~vWhi &!!,#^[&(Tc !!"++-0%nfFWYf	( --
3--
3==5r   c                    | j                   r | j                  d   |||      }t        | j                  dd  | j                  | j                        D ]0  \  }}} |||      }| j
                   ||||||      } |||      }2 |S )Nr   r   r>   )rr   r   rM   r   r   r   r   s	            r   r0   z!Kandinsky3DownSampleBlock.forward?  s    ""1jZHA03DOOAB4GZ^ZjZj0k 	*,Iy*!Z(A+aWlJO1j)A		*
 r   r   r   r1   r3   s   @r   rV   rV     s(     56n	r   rV   c                   $     e Zd Z fdZd Z xZS )Kandinsky3ConditionalGroupNormc                    t         |           t        j                  ||d      | _        t        j
                  t        j                         t        j                  |d|z              | _        | j                  d   j                  j                  j                          | j                  d   j                  j                  j                          y )NF)affiner   r>   )r#   r$   r   rZ   norm
Sequentialr\   r%   context_mlpweightdatazero_r"   )r)   r8   normalized_shaper   r,   s       r   r$   z'Kandinsky3ConditionalGroupNorm.__init__L  s    LL)9%H	==BIIk1O_K_4`a""''--/  %%++-r   c                     | j                  |      }t        t        |j                  dd              D ]  }|j	                  d      } |j                  dd      \  }}| j                  |      |dz   z  |z   }|S )Nr   rD   r>   r{   g      ?)r   rangerN   r   r~   chunkr   )r)   r/   r   _scaleshifts         r   r0   z&Kandinsky3ConditionalGroupNorm.forwardS  s}    ""7+s17712;'( 	,A''+G	, }}QA}.uIIaLECK(50r   r1   r3   s   @r   r   r   K  s    .r   r   c                   &     e Zd Zd fd	Zd Z xZS )Kandinsky3Blockc                    t         |           t        |||      | _        t	        j
                         | _        |!|rt	        j                  ||dd      | _        nt	        j                         | _        t        |dkD        }t	        j                  ||||      | _        |!|st	        j                  ||dd      | _        y t	        j                         | _        y )Nr   rB   strider>   rA   )r#   r$   r   
group_normr   r\   
activationConvTranspose2drt   r   r   rI   
projectionrs   )	r)   r6   rc   r   rB   norm_groupsr   rC   r,   s	           r   r$   zKandinsky3Block.__init___  s    8kSab'')$//[VW`abDN[[]DNkAo&))K;`gh$]!yy|QR[\]D!{{}Dr   c                     | j                  ||      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }|S r.   )r   r   rt   r   rs   )r)   r/   r   s      r   r0   zKandinsky3Block.forwardp  sQ    OOAz*OOANN1OOAQr   )r   r   Nr1   r3   s   @r   r   r   ^  s    -"r   r   c                   4     e Zd Zddddgz  f fd	Zd Z xZS )r   r   r   r=   Nc                 l   t         |           g d}t        ||      |z  }||fg||fgdz  z   ||fgz   }	t        j                  t        |	||      D 
cg c]  \  \  }
}}}t        |
|||||       c}}}}
      | _        d|v rt        j                  ||dd      nt        j                         | _
        ||k7  rt        j                  ||d      nt        j                         | _        d|v rt        j                  ||dd      | _        y t        j                         | _        y c c}}}}
w )N)r>   r   r   r>   r   Tr   r>   )rB   F)r#   r$   maxr   rR   rM   r   resnet_blocksr   r   shortcut_up_samplerI   shortcut_projectionshortcut_down_sample)r)   r6   rc   r   r   r`   r   kernel_sizeshidden_channelr   r   r   rB   r   r,   s                 r   r$   zKandinsky3ResNetBlock.__init__z  sS    	#[,7;LL>*+/O.PST.TTYgiuXvWww 	  ]] NQ#\>N I-Z{M  
KVacpq
 ~% {KQqQ 	 DOR^C^BIIk<Q?dfdododq 	 
 & IIlLaJ 	!  	!s   D.c                     |}| j                   D ]  } |||      } | j                  |      }| j                  |      }| j                  |      }||z   }|S r.   )r   r   r   r   )r)   r/   r   outresnet_blocks        r   r0   zKandinsky3ResNetBlock.forward  sh     .. 	0LsJ/C	0 ##A&$$Q'%%a(Gr   r1   r3   s   @r   r   r   y  s    EG[\mnrvqwmw
>	r   r   c                   (     e Zd Zd fd	ZddZ xZS )rG   c                 L    t         |           t        ||||d      | _        y )NFdim_headro   out_bias)r#   r$   r   r   )r)   num_channelsr   r   r,   s       r   r$   z#Kandinsky3AttentionPooling.__init__  s)    " 
r   c                     |j                  |j                        }| j                  |j                  dd      ||      }||j	                  d      z   S )Nry   r>   T)r|   keepdim)r}   ry   r   meansqueeze)r)   r/   r   r   s       r   r0   z"Kandinsky3AttentionPooling.forward  sI    #W]];..!T!BG\Z7??1%%%r   )r   r.   r1   r3   s   @r   rG   rG     s    
&r   rG   c                   (     e Zd Zd fd	ZddZ xZS )r   c           
      ^   t         |           t        |||      | _        t	        ||xs |||d      | _        ||z  }t        |||      | _        t        j                  t        j                  ||dd      t        j                         t        j                  ||dd            | _        y )NFr   r>   )rB   r"   )r#   r$   r   in_normr   r   out_normr   r   rI   r\   feed_forward)	r)   r   r   r   r   r   r_   r   r,   s	           r   r$   z!Kandinsky3AttentionBlock.__init__  s    5k<Q_`"'< 
 *L86{LR`aMMIIlOOGGIIIo|O
r   c                     |j                   dd  \  }}| j                  ||      }|j                  |j                   d   d||z        j                  ddd      }||n|}||j	                  |j
                        }| j                  |||      }|j                  ddd      j                  d      j                  |j                   d   d||      }||z   }| j                  ||      }| j                  |      }||z   }|S )Nr   rD   r   r>   r   )
r   r   reshapepermuter}   ry   r   r~   r   r   )	r)   r/   r   r   r   r   heightwidthr   s	            r   r0   z Kandinsky3AttentionBlock.forward  s    ll1j)kk!''!*b&5.9AA!QJ$0'c#'????LnnS'<8kk!Q",,R0881r6SXYGmmAz*$Gr   )Nr   r   r=   r   r1   r3   s   @r   r   r     s    
&r   r   )%dataclassesr   typingr   r   r   r   configuration_utilsr   r	   utilsr
   r   r   r   attention_processorr   r   
embeddingsr   r   modeling_utilsr   
get_loggerr   loggerr   Moduler    r5   rX   rV   r   r   r   rG   r   r   r   r   <module>r      s    "    B ( & : 5 ' 
		H	%  :    	BII 	L3Z L3^Dbii DNA		 AHRYY &bii 6)BII )X& &"#ryy #r   