
    ij8                     ~   d dl Z d dlmc mZ d dl mZ d dlmZ ddlmZm	Z	 ddl
mZ ddlmZmZmZ dd	lmZmZmZmZ dd
lmZmZ ddlmZ ddlmZmZ ddlmZmZ  G d deeee      Z  G d dejB                        Z" G d dejB                        Z# G d dejB                        Z$ G d dejB                        Z%y)    N)nn
checkpoint   )ConfigMixinregister_to_config)PeftAdapterMixin   )AttentionMixinBasicTransformerBlockSkipFFTransformerBlock)ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSAttnAddedKVProcessorAttnProcessor)TimestepEmbeddingget_timestep_embedding)
ModelMixin)GlobalResponseNormRMSNorm)Downsample2D
Upsample2Dc            .            e Zd ZdZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddededededededed	ed
edededededededededededededef, fd       ZddZ	d Z
 xZS )UVit2DModelThidden_sizeuse_biashidden_dropoutcond_embed_dimmicro_cond_encode_dimmicro_cond_embed_dimencoder_hidden_size
vocab_sizecodebook_sizein_channelsblock_out_channelsnum_res_blocks
downsampleupsampleblock_num_headsnum_hidden_layersnum_attention_headsattention_dropoutintermediate_sizelayer_norm_epsln_elementwise_affinesample_sizec                    t         |           t        j                  |||      | _        t        |||      | _        t        |
|||||      | _        t        ||z   ||      | _
        t        ||||||||||d      | _        t        |||      | _        t        j                  |||      | _        t        j                  t!        |      D cg c]  }t#        ||||z  |||d|||||||        c}      | _        t        |||      | _        t        j                  |||      | _        t        |||||||||d|      | _        t-        ||
||||	      | _        d| _        y c c}w )Nbias)sample_proj_biasFada_norm_continuous)dimr+   attention_head_dimdropoutcross_attention_dimattention_bias	norm_type-ada_norm_continous_conditioning_embedding_dimnorm_elementwise_affinenorm_epsada_norm_biasff_inner_dimff_biasattention_out_bias)r'   r(   )super__init__r   Linearencoder_projr   encoder_proj_layer_normUVit2DConvEmbedembedr   
cond_embed	UVitBlock
down_blockproject_to_hidden_normproject_to_hidden
ModuleListranger   transformer_layersproject_from_hidden_normproject_from_hiddenup_blockConvMlmLayer	mlm_layergradient_checkpointing)selfr   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   _	__class__s                           h/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/models/unets/uvit_2d.pyrD   zUVit2DModel.__init__(   s   D 	II&9;XV'.{NLa'b$$+Z9NP^`h

 , >1;QY
 $!
 '..@.Rg&h#!#+={QY!Z"$--$ 01#" ! &#(;'26I'I*(3#+3BM,A+"*!2$'/#
, )0^Mb(c%#%99[:LS[#\ !!
 &X7Ln^k
 ',#Us   #E%c                    | j                  |      }| j                  |      }t        |j                         | j                  j
                  dd      }|j                  |j                  d   df      }t        j                  ||gd      }|j                  | j                        }| j                  |      j                  |j                        }| j                  |      }| j                  ||||      }|j                  \  }}	}
}|j                  dd	d
d      j                  ||
|z  |	      }| j!                  |      }| j#                  |      }| j$                  D ]8  t        j&                         r| j(                  rfd}n} ||||d|i      }: | j+                  |      }| j-                  |      }|j                  ||
||	      j                  dd
dd	      }| j/                  ||||      }| j1                  |      }|S )NTr   )flip_sin_to_cosdownscale_freq_shift   r6   )dtype)pooled_text_embencoder_hidden_statescross_attention_kwargsr
   r   c                      t        g|  S Nr   )argslayers    r[   layer_z#UVit2DModel.forward.<locals>.layer_   s    %e3d33    rc   )rd   re   added_cond_kwargs)rF   rG   r   flattenconfigr   reshapeshapetorchcattorb   rJ   rI   rL   permuterM   rN   rQ   is_grad_enabledrW   rR   rS   rT   rV   )rX   	input_idsrd   rc   micro_condsre   micro_cond_embedshidden_states
batch_sizechannelsheightwidthrj   logitsri   s                 @r[   forwardzUVit2DModel.forward   s    $ 1 12G H $ < <=R S2!4;;#D#DVZqr
 .55yq7I26NO))_6G$HaP),,4::,>///:==>S>Y>YZ

9-+"7#9	 ( 
 /<.A.A+
Hfe%--aAq9AA*fW\n^fg33MB..}=,, 	E$$&4+F+F4 "&;'=#4o"F	M	  55mD00?%--j&%RZZ[\^_abdef+"7#9	 & 
 .rk   c           	      j   t        d | j                  j                         D              rt               }nmt        d | j                  j                         D              rt	               }n8t        dt        t        | j                  j                                            | j                  |       y)ze
        Disables custom attention processors and sets the default attention implementation.
        c              3   @   K   | ]  }|j                   t        v   y wrg   )rZ   r   .0procs     r[   	<genexpr>z9UVit2DModel.set_default_attn_processor.<locals>.<genexpr>   s     i4t~~!>>i   c              3   @   K   | ]  }|j                   t        v   y wrg   )rZ   r   r   s     r[   r   z9UVit2DModel.set_default_attn_processor.<locals>.<genexpr>   s     h$#==hr   zOCannot call `set_default_attn_processor` when attention processors are of type N)	allattn_processorsvaluesr   r   
ValueErrornextiterset_attn_processor)rX   	processors     r[   set_default_attn_processorz&UVit2DModel.set_default_attn_processor   s     i4K_K_KfKfKhii,.Ih$J^J^JeJeJghh%Iabfgklp  mA  mA  mH  mH  mJ  hK  cL  bM  N  		*rk   )i   F              i   r   i@   i    r   r   r   FF         r   i   gư>T@   rg   )__name__
__module____qualname__ _supports_gradient_checkpointingr   intboolfloatrD   r   r   __classcell__rZ   s   @r[   r   r   %   s_   '+$   #!%($(#&!"% !!##%#&!% $&*?j, j, 	j,
 j, j,  #j, "j, !j, j, j, j,   !j," #j,$ %j,& 'j,( )j,, -j,. !/j,2 !3j,6 7j,: ;j,<  $=j,> ?j, j,X;|+rk   r   c                   $     e Zd Z fdZd Z xZS )rH   c                     t         |           t        j                  ||      | _        t        |||      | _        t        j                  ||d|      | _        y Nr`   )kernel_sizer3   )	rC   rD   r   	Embedding
embeddingsr   
layer_normConv2dconv)rX   r$   r%   r"   elementwise_affineepsr3   rZ   s          r[   rD   zUVit2DConvEmbed.__init__   sI    ,,z;?!+s4FGIIk+=1SWX	rk   c                     | j                  |      }| j                  |      }|j                  dddd      }| j                  |      }|S )Nr   r   r`   r
   )r   r   rt   r   )rX   rv   r   s      r[   r   zUVit2DConvEmbed.forward   sH    __Y/
__Z0
''1a3
YYz*
rk   r   r   r   rD   r   r   r   s   @r[   rH   rH      s    Yrk   rH   c                   2     e Zd Zdededef fdZd Z xZS )rK   r&   r'   r(   c                    t         |           |
rt        |ddddd|||	      | _        nd | _        t	        j
                  t        |      D cg c]  }t        ||||||       c}      | _        t	        j
                  t        |      D cg c]  }t        ||||z  |||	|||	       c}      | _
        |rt        |ddddd|||d	

      | _        y d | _        y c c}w c c}w )NTr   Conv2d_0r
   rms_norm)use_convpaddingnamer   r;   r   r   r3   )r:   rB   r   F)	use_conv_transposer   r   r   r;   r   r   r3   interpolate)rC   rD   r   r'   r   rO   rP   ConvNextBlock
res_blocksr   attention_blocksr   r(   )rX   r{   r&   r   r   r/   r.   r   r)   r,   r'   r(   irY   rZ   s                 r[   rD   zUVitBlock.__init__   s#    	*$"#8
DO #DO-- ~.
  ")"

 !# ~.  '#/%#+'/
!
" &#'$"#8!DM !DMY
s   CC"c                    | j                   | j                  |      }t        | j                  | j                        D ]v  \  }} |||      }|j                  \  }}}	}
|j                  |||	|
z        j                  ddd      } ||||      }|j                  ddd      j                  |||	|
      }x | j                  | j                  |      }|S )Nr   r
   r`   )rd   re   )r'   zipr   r   rp   viewrt   r(   )rX   xrc   rd   re   	res_blockattention_blockrz   r{   r|   r}   s              r[   r   zUVitBlock.forwardA  s    ??&"A*-doot?T?T*U 	M&I!_-A23''/J&%z8Ve^<DDQ1MA)>WmA 		!Q"''
HfeLA	M ==$a Ark   )r   r   r   r   r   rD   r   r   r   s   @r[   rK   rK      s0    L! L! L! L!\rk   rK   c                   (     e Zd Z	 d fd	Zd Z xZS )r   c                    t         |           t        j                  ||dd||      | _        t        |||      | _        t        j                  |t        ||z        |      | _	        t        j                         | _        t        t        ||z              | _        t        j                  t        ||z        ||      | _        t        j                  |      | _        t        j                  ||dz  |      | _        y )Nr   r`   )r   r   groupsr3   r2   r
   )rC   rD   r   r   	depthwiser   normrE   r   channelwise_linear_1GELUchannelwise_actr   channelwise_normchannelwise_linear_2Dropoutchannelwise_dropoutcond_embeds_mapper)	rX   r{   r.   r/   r   r   r   res_ffn_factorrZ   s	           r[   rD   zConvNextBlock.__init__V  s     	
 Hn6KL	$&IIhH~<U8V]e$f!!wwy 23x.7P3Q R$&IIc(^2K.Lh]e$f!#%::n#= "$))KAx"Prk   c                    |}| j                  |      }|j                  dddd      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  |      }| j                  |      }|j                  dddd      }||z   }| j                  t        j                  |            j                  dd      \  }}|d|d d d d d d f   z   z  |d d d d d d f   z   }|S )Nr   r
   r   r`   ra   )r   rt   r   r   r   r   r   r   r   Fsiluchunk)rX   r   cond_embedsx_resscaleshifts         r[   r   zConvNextBlock.forwardj  s   NN1IIaAq!IIaL%%a(  #!!!$%%a($$Q'IIaAq!I..qvvk/BCII!QRISuU1at+,,-aD$6F0GGrk   )   r   r   s   @r[   r   r   U  s    uvQ(rk   r   c                   >     e Zd Zdedededededef fdZd Z xZS )	rU   r%   r$   r   r/   r.   r#   c                     t         |           t        j                  ||d|      | _        t        |||      | _        t        j                  ||d|      | _        y r   )rC   rD   r   r   conv1r   r   conv2)rX   r%   r$   r   r/   r.   r#   rZ   s          r[   rD   zConvMlmLayer.__init__  sO     	YY1;AT\]
!+~?TUYY{MqxX
rk   c                     | j                  |      }| j                  |j                  dddd            j                  dddd      }| j                  |      }|S )Nr   r
   r   r`   )r   r   rt   r   )rX   ry   r~   s      r[   r   zConvMlmLayer.forward  sW    

=1(=(=aAq(IJRRSTVWYZ\]^M*rk   )	r   r   r   r   r   r   rD   r   r   r   s   @r[   rU   rU     sN    YY Y 	Y
  $Y Y Yrk   rU   )&rq   torch.nn.functionalr   
functionalr   torch.utils.checkpointr   configuration_utilsr   r   loadersr	   	attentionr   r   r   attention_processorr   r   r   r   r   r   r   modeling_utilsr   normalizationr   r   resnetr   r   r   ModulerH   rK   r   rU    rk   r[   <module>r      s   "     - B ' U U  C ' 7 -{+*nk;K {+|bii `		 `F*BII *Z299 rk   