
    i                     T   d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZ e G d de             Ze G d de             Z G d de	j2                        Z G d de	j2                        Z G d de	j2                        Z G d de	j2                        Z G d de	j2                        Z G d de	j2                        Z G d de       Z! G d de       Z" G d d e	j2                        Z# G d! d"e	j2                        Z$ G d# d$      Z%y)%    )	dataclass)OptionalTupleN   )
BaseOutput)randn_tensor   )get_activation)SpatialNorm)AutoencoderTinyBlockUNetMidBlock2Dget_down_blockget_up_blockc                   0    e Zd ZU dZej
                  ed<   y)EncoderOutputz
    Output of encoding method.

    Args:
        latent (`torch.Tensor` of shape `(batch_size, num_channels, latent_height, latent_width)`):
            The encoded latent.
    latentN)__name__
__module____qualname____doc__torchTensor__annotations__     k/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/models/autoencoders/vae.pyr   r   !   s     LLr   r   c                   X    e Zd ZU dZej
                  ed<   dZeej                     ed<   y)DecoderOutputz
    Output of decoding method.

    Args:
        sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)`):
            The decoded output sample from the last layer of the model.
    sampleNcommit_loss)
r   r   r   r   r   r   r   r    r   FloatTensorr   r   r   r   r   .   s(     LL/3K%++,3r   r   c                        e Zd ZdZ	 	 	 	 	 	 	 	 	 ddededeedf   deedf   deded	ed
ef fdZde	j                  de	j                  fdZ xZS )Encodera  
    The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        down_block_types (`Tuple[str, ...]`, *optional*, defaults to `("DownEncoderBlock2D",)`):
            The types of down blocks to use. See `~diffusers.models.unet_2d_blocks.get_down_block` for available
            options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        double_z (`bool`, *optional*, defaults to `True`):
            Whether to double the number of output channels for the last block.
    in_channelsout_channelsdown_block_types.block_out_channelslayers_per_blocknorm_num_groupsact_fndouble_zc
                    t         |           || _        t        j                  ||d   ddd      | _        t        j                  g       | _        |d   }
t        |      D ]Y  \  }}|
}||   }
|t        |      dz
  k(  }t        || j                  ||
| dd|||
d       }| j                  j                  |       [ t        |d   d|dd|d   |d |			      | _        t        j                  |d   |d
      | _        t        j                          | _        |rd|z  n|}t        j                  |d   |dd      | _        d| _        y )Nr   r      kernel_sizestridepaddingư>)

num_layersr$   r%   add_downsample
resnet_epsdownsample_paddingresnet_act_fnresnet_groupsattention_head_dimtemb_channelsdefault	r$   r5   r7   output_scale_factorresnet_time_scale_shiftr9   r8   r:   add_attentionnum_channels
num_groupsepsr	   r1   F)super__init__r(   nnConv2dconv_in
ModuleListdown_blocks	enumeratelenr   appendr   	mid_block	GroupNormconv_norm_outSiLUconv_actconv_outgradient_checkpointing)selfr$   r%   r&   r'   r(   r)   r*   r+   mid_block_add_attentionoutput_channelidown_block_typeinput_channelis_final_block
down_blockconv_out_channels	__class__s                    r   rG   zEncoder.__init__T   sw    	 0yyq!
 ==, ,A."+,<"= 	0A*M/2N#&8"9A"==N'00)+#11#$$-#1"J ##J/%	0* (*2.  !$-1"5)1

  \\7I"7MZiost	08A,l		"4R"8:KQXYZ&+#r   r   returnc                    | j                  |      }t        j                         rL| j                  r@| j                  D ]  }| j                  ||      } | j                  | j                  |      }n*| j                  D ]
  } ||      } | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S )z*The forward method of the `Encoder` class.)
rJ   r   is_grad_enabledrV   rL   _gradient_checkpointing_funcrP   rR   rT   rU   )rW   r   r^   s      r   forwardzEncoder.forward   s     f%  "t'B'B".. O
:::vNO 66t~~vNF #.. ,
#F+, ^^F+F ##F+v&v&r   )	r   r   )DownEncoderBlock2D@   r	       siluTT)r   r   r   r   intr   strboolrG   r   r   re   __classcell__r`   s   @r   r#   r#   <   s    2 ,C.3 !! $C,C, C,  S/	C,
 "#s(OC, C, C, C, C,Jell u|| r   r#   c                        e Zd ZdZ	 	 	 	 	 	 	 	 	 ddededeedf   deedf   deded	ed
ef fdZ	 ddej                  de
ej                     dej                  fdZ xZS )Decodera  
    The `Decoder` layer of a variational autoencoder that decodes its latent representation into an output sample.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        up_block_types (`Tuple[str, ...]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
            The types of up blocks to use. See `~diffusers.models.unet_2d_blocks.get_up_block` for available options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        norm_type (`str`, *optional*, defaults to `"group"`):
            The normalization type to use. Can be either `"group"` or `"spatial"`.
    r$   r%   up_block_types.r'   r(   r)   r*   	norm_typec
                    t         |           || _        t        j                  ||d   ddd      | _        t        j                  g       | _        |dk(  r|nd }
t        |d   d|d|dk(  rdn||d   ||
|			      | _	        t        t        |            }|d
   }t        |      D ]_  \  }}|}||   }|t        |      dz
  k(  }t        || j                  dz   |||| d||||
|      }| j                  j                  |       |}a |dk(  rt!        |d
   |
      | _        n t        j$                  |d
   |d      | _        t        j&                         | _        t        j                  |d
   |dd      | _        d| _        y )Nr;   r   r-   r.   spatialr2   groupr<   r=   r   r3   r$   r%   prev_output_channeladd_upsampler5   r7   r8   r9   r:   r?   rA   rE   F)rF   rG   r(   rH   rI   rJ   rK   	up_blocksr   rP   listreversedrM   rN   r   rO   r   rR   rQ   rS   rT   rU   rV   )rW   r$   r%   rr   r'   r(   r)   r*   rs   rX   r:   reversed_block_out_channelsrY   rZ   up_block_typerx   r]   up_blockr`   s                     r   rG   zDecoder.__init__   s    	 0yyr"
 r*'0I'=4 (*2.  !1:g1EI91"5)'1

 '+84F+G&H#4Q7 ). 9 	1A}"08;N#&8"9A"==N#0014/+$7!//$-#1+(1H NN!!(+"0+	10 	!!,-?-BM!RD!#;Ma;P]lrv!wD			"4Q"7qRST&+#r   r   latent_embedsra   c                    | j                  |      }t        j                         rN| j                  rB| j	                  | j
                  ||      }| j                  D ]  }| j	                  |||      } n,| j                  ||      }| j                  D ]  } |||      } || j                  |      }n| j                  ||      }| j                  |      }| j                  |      }|S )z*The forward method of the `Decoder` class.)
rJ   r   rc   rV   rd   rP   rz   rR   rT   rU   )rW   r   r   r   s       r   re   zDecoder.forward  s     f%  "t'B'B66t~~v}]F !NN \::8V][\ ^^FM:F !NN 9!&-89  ''/F''>Fv&v&r   )	r   r   UpDecoderBlock2Drg   r	   ri   rj   rv   TNr   r   r   r   rk   r   rl   rG   r   r   r   re   rn   ro   s   @r   rq   rq      s    0 *?.3 !!  $J,J, J, c3h	J,
 "#s(OJ, J, J, J, J,^ 15    -  
	 r   rq   c                   h     e Zd ZdZdededdf fdZdej                  dej                  fdZ xZ	S )	UpSamplea&  
    The `UpSample` layer of a variational autoencoder that upsamples its input.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
    r$   r%   ra   Nc                 |    t         |           || _        || _        t	        j
                  ||ddd      | _        y )N   r	   r-   r.   )rF   rG   r$   r%   rH   ConvTranspose2ddeconv)rW   r$   r%   r`   s      r   rG   zUpSample.__init__F  s=    
 	&(((lPQZ[efgr   xc                 R    t        j                  |      }| j                  |      }|S )z+The forward method of the `UpSample` class.)r   relur   rW   r   s     r   re   zUpSample.forwardP  s!    JJqMKKNr   
r   r   r   r   rk   rG   r   r   re   rn   ro   s   @r   r   r   ;  sH    hh h 
	h %,, r   r   c                   z     e Zd ZdZ	 	 	 ddededededdf
 fdZdd	ej                  dej                  fd
Z xZ	S )MaskConditionEncoderz)
    used in AsymmetricAutoencoderKL
    in_chout_chres_chr0   ra   Nc           
      4   t         |           g }|dkD  r6|dz  }|dz  }||kD  r|}|dk(  r|}|j                  ||f       |dz  }|dkD  r6g }|D ]  \  }}	|j                  |	        |j                  |d   d          g }
|}t        t	        |            D ]f  }||   }|dk(  s|dk(  r*|
j                  t        j                  ||ddd             n)|
j                  t        j                  ||ddd             |}h t        j                  |
 | _        y )Nr-   r	   r;   r   r   r.   r   )	rF   rG   rO   rangerN   rH   rI   
Sequentiallayers)rW   r   r   r   r0   channelsin_ch_r%   _in_ch_out_chr   lout_ch_r`   s                r   rG   zMaskConditionEncoder.__init__\  s3    	qjq[FaZF{OOVV,-aKF qj ' 	)OFG(	)HRLO,s<() 	A"1oGAvabiiQqZ[\]biiQqZ[\]F	 mmV,r   r   c                     i }t        t        | j                              D ]O  }| j                  |   } ||      }||t        t	        |j
                              <   t        j                  |      }Q |S )z7The forward method of the `MaskConditionEncoder` class.)r   rN   r   rl   tupleshaper   r   )rW   r   maskoutr   layers         r   re   zMaskConditionEncoder.forward  se    s4;;'( 	AKKNEaA'(CE!''N#$

1A		
 
r   )   i      r   r   ro   s   @r   r   r   W  sd     #-#- #- 	#-
 #- 
#-J U\\ r   r   c                       e Zd ZdZ	 	 	 	 	 	 	 	 ddededeedf   deedf   deded	ed
ef fdZ	 	 	 ddej                  de
ej                     de
ej                     de
ej                     dej                  f
dZ xZS )MaskConditionDecodera  The `MaskConditionDecoder` should be used in combination with [`AsymmetricAutoencoderKL`] to enhance the model's
    decoder with a conditioner on the mask and masked image.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        up_block_types (`Tuple[str, ...]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
            The types of up blocks to use. See `~diffusers.models.unet_2d_blocks.get_up_block` for available options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        norm_type (`str`, *optional*, defaults to `"group"`):
            The normalization type to use. Can be either `"group"` or `"spatial"`.
    r$   r%   rr   .r'   r(   r)   r*   rs   c	                 8   t         |           || _        t        j                  ||d   ddd      | _        t        j                  g       | _        |dk(  r|nd }	t        |d   d|d|dk(  rdn||d   ||		      | _	        t        t        |            }
|
d
   }t        |      D ]_  \  }}|}|
|   }|t        |      dz
  k(  }t        || j                  dz   ||d | d||||	|      }| j                  j                  |       |}a t!        ||d
   |d         | _        |dk(  rt%        |d
   |	      | _        n t        j(                  |d
   |d      | _        t        j*                         | _        t        j                  |d
   |dd      | _        d| _        y )Nr;   r   r-   r.   ru   r2   rv   r<   )r$   r5   r7   r>   r?   r9   r8   r:   r   rw   )r   r   r   rA   rE   F)rF   rG   r(   rH   rI   rJ   rK   rz   r   rP   r{   r|   rM   rN   r   rO   r   condition_encoderr   rR   rQ   rS   rT   rU   rV   )rW   r$   r%   rr   r'   r(   r)   r*   rs   r:   r}   rY   rZ   r~   rx   r]   r   r`   s                    r   rG   zMaskConditionDecoder.__init__  s    	 0yyr"
 r*'0I'=4 (*2.  !1:g1EI91"5)'	
 '+84F+G&H#4Q7 ). 9 	1A}"08;N#&8"9A"==N#0014/+$(!//$-#1+(1H NN!!(+"0+	10 "6%a(%b)"
 	!!,-?-BM!RD!#;Ma;P]lrv!wD			"4Q"7qRST&+#r   zimager   r   ra   c                 j   |}| j                  |      }t        t        | j                  j	                                     j
                  }t        j                         r| j                  r| j                  | j                  ||      }|j                  |      }|'|%d|z
  |z  }| j                  | j                  ||      }| j                  D ]w  }	|`|^t        t        |j                                 }
t         j"                  j%                  ||j                  dd d      }||z  |
d|z
  z  z   }| j                  |	||      }y ||||z  t        t        |j                                 d|z
  z  z   }n| j                  ||      }|j                  |      }||d|z
  |z  }| j                  ||      }| j                  D ]m  }	|`|^t        t        |j                                 }
t         j"                  j%                  ||j                  dd d      }||z  |
d|z
  z  z   } |	||      }o |/|-||z  t        t        |j                                 d|z
  z  z   }|| j'                  |      }n| j'                  ||      }| j)                  |      }| j+                  |      }|S )z7The forward method of the `MaskConditionDecoder` class.Nr-   nearest)sizemode)rJ   nextiterrz   
parametersdtyper   rc   rV   rd   rP   tor   rl   r   r   rH   
functionalinterpolaterR   rT   rU   )rW   r   r   r   r   r   upscale_dtypemasked_imageim_xr   sample_mask_s               r   re   zMaskConditionDecoder.forward  s    f%T$..";";"=>?EE  "t'B'B66t~~v}]FYY}-F  T%5 !DE188**  !NN \$)9"3uV\\':#;<GMM55dbcARYb5cE#e^gU.CCF::8V][\  T%5$c%2E.F)G1t8)TT ^^FM:FYY}-F  T%5 !DE1--lDA !NN 9$)9"3uV\\':#;<GMM55dbcARYb5cE#e^gU.CCF!&-89  T%5$c%2E.F)G1t8)TT  ''/F''>Fv&v&r   )r   r   r   rg   r	   ri   rj   rv   )NNNr   ro   s   @r   r   r     s    0 *?.3 !! O,O, O, c3h	O,
 "#s(OO, O, O, O, O,h )-'+04?<<? %? u||$	?
  -? 
?r   r   c                   T    e Zd ZdZ	 	 	 	 ddedededededef fdZd	e	j                  d
e	j                  fdZd	e	j                  d
e	j                  fdZde	j                  d
ee	j                  e	j                  ef   fdZde	j                  deedf   d
e	j                  fdZ xZS )VectorQuantizerz
    Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly avoids costly matrix
    multiplications and allows for post-hoc remapping of indices.
    n_evq_embed_dimbetaunknown_indexsane_index_shapelegacyc           	      >   t         |           || _        || _        || _        || _        t        j                  | j                  | j                        | _        | j                  j                  j                  j                  d| j                  z  d| j                  z         || _        | j                  | j                  dt        j                  t!        j"                  | j                                     |  | j$                  j&                  d   | _        || _        | j*                  dk(  r%| j(                  | _        | j(                  dz   | _        t-        d| j                   d| j(                   d	| j*                   d
       || _        y || _        || _        y )Ng            ?usedr   extrar-   z
Remapping z indices to z indices. Using z for unknown indices.)rF   rG   r   r   r   r   rH   	Embedding	embeddingweightdatauniform_remapregister_bufferr   tensornploadr   r   re_embedr   printr   )	rW   r   r   r   r   r   r   r   r`   s	           r   rG   zVectorQuantizer.__init__?  sL    	(	dhh0A0AB""++D488OS488^L
::!  bggdjj6I)JK IIOOA.DM!.D!!W,%)]]" $ 1TXXJl4==/ B++,,AC !1  DM 0r   indsra   c                    |j                   }t        |      dkD  sJ |j                  |d   d      }| j                  j	                  |      }|d d d d d f   |d   k(  j                         }|j                  d      }|j                  d      dk  }| j                  dk(  rMt        j                  d| j                  ||   j                         j	                  |j                        ||<   n| j                  ||<   |j                  |      S )	Nr-   r   r;   )NN.r	   random)r   )device)r   rN   reshaper   r   longargmaxsumr   r   randintr   r   )rW   r   ishaper   matchnewunknowns          r   remap_to_usedzVectorQuantizer.remap_to_usedd  s    6{Q||F1Ir*yy||D!aDj!T/%::@@Bll2))A,") ==DMMG@R@RSVV^a^h^hViCL--CL{{6""r   c                    |j                   }t        |      dkD  sJ |j                  |d   d      }| j                  j	                  |      }| j
                  | j                  j                   d   kD  rd||| j                  j                   d   k\  <   t        j                  |d d d f   |j                   d   dgz  d d f   d|      }|j                  |      S )Nr-   r   r;   )r   rN   r   r   r   r   r   gather)rW   r   r   r   backs        r   unmap_to_allzVectorQuantizer.unmap_to_allr  s    6{Q||F1Ir*yy||D!==499??1--/0D++,||DqM$**Q-1#*=q*@A1dK||F##r   r   c                    |j                  dddd      j                         }|j                  d| j                        }t	        j
                  t	        j                  || j                  j                        d      }| j                  |      j                  |j                        }d }d }| j                  sa| j                  t	        j                  |j                         |z
  dz        z  t	        j                  ||j                         z
  dz        z   }n`t	        j                  |j                         |z
  dz        | j                  t	        j                  ||j                         z
  dz        z  z   }|||z
  j                         z   }|j                  dddd      j                         }| j                  B|j                  |j                  d   d      }| j!                  |      }|j                  dd      }| j"                  r:|j                  |j                  d   |j                  d   |j                  d         }|||||ffS )Nr   r	   r   r-   r;   dim)permute
contiguousviewr   r   argmincdistr   r   r   r   r   meandetachr   r   r   r   )rW   r   z_flattenedmin_encoding_indicesz_q
perplexitymin_encodingslosss           r   re   zVectorQuantizer.forward|  s   IIaAq!,,.ffR!2!23  %||EKKT^^EZEZ,[abcnn1277@
 {{99uzz3::<!+;*ABBUZZQTWXW_W_WaQafgPgEhhD::szz|a/A56UZZQTWXW_W_WaQafgPgEh9hhD q 0 0 22 kk!Q1%002::!#7#?#?
B#O #'#5#56J#K #7#?#?A#F   #7#?#?		!ciiXYl\_\e\efg\h#i D:}6JKKKr   indicesr   .c                    | j                   7|j                  |d   d      }| j                  |      }|j                  d      }| j                  |      }|3|j	                  |      }|j                  dddd      j                         }|S )Nr   r;   r   r-   r	   )r   r   r   r   r   r   r   )rW   r   r   r   s       r   get_codebook_entryz"VectorQuantizer.get_codebook_entry  s    ::!ooeAh3G''0Goob)G !NN73((5/C++aAq)446C
r   )Nr   FT)r   r   r   r   rk   floatrl   rm   rG   r   
LongTensorr   r   r   r   re   r   rn   ro   s   @r   r   r   6  s     %!&#1#1 #1 	#1 #1 #1 #1J#%"2"2 #u7G7G #$!1!1 $e6F6F $ L  L%ellE0Q*R  LD%*:*: 5c? W\WcWc r   r   c                      e Zd Zddej                  defdZddeej                     dej                  fdZ	ddd dej                  fd	Z
g d
fdej                  deedf   dej                  fdZdej                  fdZy)DiagonalGaussianDistributionr   deterministicc                    || _         t        j                  |dd      \  | _        | _        t        j
                  | j                  dd      | _        || _        t        j                  d| j                  z        | _        t        j                  | j                        | _	        | j                  rWt        j                  | j                  | j                   j                  | j                   j                        x| _	        | _        y y )Nr	   r-   r   g      >g      4@      ?)r   r   )r   r   chunkr   logvarclampr   expstdvar
zeros_liker   r   )rW   r   r   s      r   rG   z%DiagonalGaussianDistribution.__init__  s    $!&Z!B	4;kk$++ud;*99S4;;./99T[[)"'"2"2		$//"8"8@U@U# DHtx r   N	generatorra   c                     t        | j                  j                  || j                  j                  | j                  j
                        }| j                  | j                  |z  z   }|S )N)r
  r   r   )r   r   r   r   r   r   r  )rW   r
  r   r   s       r   r   z#DiagonalGaussianDistribution.sample  sR    IIOO??))//''	
 II6))r   otherc                    | j                   rt        j                  dg      S |Wdt        j                  t        j                  | j
                  d      | j                  z   dz
  | j                  z
  g d      z  S dt        j                  t        j                  | j
                  |j
                  z
  d      |j                  z  | j                  |j                  z  z   dz
  | j                  z
  |j                  z   g d      z  S )N        r  r	   r   r-   r	   r   r   )r   r   r   r   powr   r  r  )rW   r  s     r   klzDiagonalGaussianDistribution.kl  s    <<&&}UYYIIdii+dhh6<t{{J!  
 UYYIIdii%**4a8599Dhh*+ kk" ll	#
 "  r   r  r   dims.c                 B   | j                   rt        j                  dg      S t        j                  dt        j
                  z        }dt        j                  || j                  z   t        j                  || j                  z
  d      | j                  z  z   |      z  S )Nr  g       @r  r	   r   )r   r   r   r   logpir   r  r  r   r  )rW   r   r  logtwopis       r   nllz DiagonalGaussianDistribution.nll  s{    <<&&66#+&UYYt{{"UYYv		/A1%E%PP
 
 	
r   c                     | j                   S r   )r   rW   s    r   r   z!DiagonalGaussianDistribution.mode  s    yyr   )Fr   )r   r   r   r   r   rm   rG   r   	Generatorr   r  r   rk   r  r   r   r   r   r   r     s    
5<< 
 
	 9 	U\\ 	6 %,, & AJ 
%,, 
eCHo 
ell 
ell r   r   c                       e Zd Zdej                  fdZddeej                     dej                  fdZdej                  fdZ	y)	IdentityDistributionr   c                     || _         y r   r   )rW   r   s     r   rG   zIdentityDistribution.__init__  s	    $r   Nr
  ra   c                     | j                   S r   r  )rW   r
  s     r   r   zIdentityDistribution.sample      r   c                     | j                   S r   r  r  s    r   r   zIdentityDistribution.mode  r   r   r   )
r   r   r   r   r   rG   r   r  r   r   r   r   r   r  r    sB    %5<< % 9 U\\ ell r   r  c            
            e Zd ZdZdededeedf   deedf   def
 fdZd	ej                  d
ej                  fdZ
 xZS )EncoderTinya  
    The `EncoderTiny` layer is a simpler version of the `Encoder` layer.

    Args:
        in_channels (`int`):
            The number of input channels.
        out_channels (`int`):
            The number of output channels.
        num_blocks (`Tuple[int, ...]`):
            Each value of the tuple represents a Conv2d layer followed by `value` number of `AutoencoderTinyBlock`'s to
            use.
        block_out_channels (`Tuple[int, ...]`):
            The number of output channels for each block.
        act_fn (`str`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
    r$   r%   
num_blocks.r'   r*   c                    t         |           g }t        |      D ]  \  }}||   }	|dk(  r)|j                  t	        j
                  ||	dd             n*|j                  t	        j
                  |	|	dddd             t        |      D ]  }
|j                  t        |	|	|                |j                  t	        j
                  |d   |dd             t	        j                  | | _	        d| _
        y )	Nr   r   r-   r/   r1   r	   F)r/   r1   r0   biasr;   )rF   rG   rM   rO   rH   rI   r   r   r   r   rV   )rW   r$   r%   r$  r'   r*   r   rZ   	num_blockrB   _r`   s              r   rG   zEncoderTiny.__init__  s     	%j1 	XLAy-a0LAvbii\qZ[\]II$$$% ! "	 9% X2<vVWX#	X( 	bii 22 6RS]^_`mmV,&+#r   r   ra   c                     t        j                         r*| j                  r| j                  | j                  |      }|S | j	                  |j                  d      j                  d            }|S )z.The forward method of the `EncoderTiny` class.r-   r	   )r   rc   rV   rd   r   adddivr   s     r   re   zEncoderTiny.forward)  sX      "t'B'B11$++qAA  AEE!HLLO,Ar   r   r   r   r   rk   r   rl   rG   r   r   re   rn   ro   s   @r   r#  r#    si    "",", ", #s(O	",
 "#s(O", ",H	 	%,, 	r   r#  c                        e Zd ZdZdededeedf   deedf   deded	ef fd
Zdej                  dej                  fdZ
 xZS )DecoderTinya  
    The `DecoderTiny` layer is a simpler version of the `Decoder` layer.

    Args:
        in_channels (`int`):
            The number of input channels.
        out_channels (`int`):
            The number of output channels.
        num_blocks (`Tuple[int, ...]`):
            Each value of the tuple represents a Conv2d layer followed by `value` number of `AutoencoderTinyBlock`'s to
            use.
        block_out_channels (`Tuple[int, ...]`):
            The number of output channels for each block.
        upsampling_scaling_factor (`int`):
            The scaling factor to use for upsampling.
        act_fn (`str`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
    r$   r%   r$  .r'   upsampling_scaling_factorr*   upsample_fnc           
         t         |           t        j                  ||d   dd      t	        |      g}t        |      D ]  \  }	}
|	t        |      dz
  k(  }||	   }t        |
      D ]  }|j                  t        |||               |s&|j                  t        j                  ||             |s|n|}|j                  t        j                  ||dd|              t        j                  | | _        d| _        y )Nr   r   r-   r&  )scale_factorr   )r/   r1   r'  F)rF   rG   rH   rI   r
   rM   rN   r   rO   r   Upsampler   r   rV   )rW   r$   r%   r$  r'   r0  r*   r1  r   rZ   r(  r]   rB   r)  conv_out_channelr`   s                  r   rG   zDecoderTiny.__init__I  s    	 IIk#5a#8aQRS6"

 &j1 	LAy3z?Q#67N-a0L9% X2<vVWX "bkk7PWbcd3A||MM		 $ !'	* mmV,&+#r   r   ra   c                    t        j                  |dz        dz  }t        j                         r)| j                  r| j	                  | j
                  |      }n| j                  |      }|j                  d      j                  d      S )z.The forward method of the `DecoderTiny` class.r   r	   r-   )r   tanhrc   rV   rd   r   mulsubr   s     r   re   zDecoderTiny.forwardr  sj     JJq1u!  "t'B'B11$++qAAAA uuQx||Ar   r-  ro   s   @r   r/  r/  5  s}    &',', ', #s(O	',
 "#s(O', $'', ', ',R %,, r   r/  c                   $    e Zd Zd Zd Zd Zd Zy)AutoencoderMixinc                 p    t        | d      s#t        d| j                  j                   d      d| _        y)a  
        Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        
use_tilingz*Tiling doesn't seem to be implemented for .TN)hasattrNotImplementedErrorr`   r   r=  r  s    r   enable_tilingzAutoencoderMixin.enable_tiling  s9     t\*%(RSWSaSaSjSjRkkl&mnnr   c                     d| _         y)z
        Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing
        decoding in one step.
        FN)r=  r  s    r   disable_tilingzAutoencoderMixin.disable_tiling  s    
  r   c                 p    t        | d      s#t        d| j                  j                   d      d| _        y)z
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        use_slicingz+Slicing doesn't seem to be implemented for r>  TN)r?  r@  r`   r   rE  r  s    r   enable_slicingzAutoencoderMixin.enable_slicing  s:    
 t]+%(STXTbTbTkTkSllm&noor   c                     d| _         y)z
        Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing
        decoding in one step.
        FN)rE  r  s    r   disable_slicingz AutoencoderMixin.disable_slicing  s    
 !r   N)r   r   r   rA  rC  rF  rH  r   r   r   r;  r;    s      !r   r;  )&dataclassesr   typingr   r   numpyr   r   torch.nnrH   utilsr   utils.torch_utilsr   activationsr
   attention_processorr   unets.unet_2d_blocksr   r   r   r   r   r   Moduler#   rq   r   r   r   r   objectr   r  r#  r/  r;  r   r   r   <module>rT     s   " "     - ( -  	J 	 	 
4J 
4 
4vbii vrCbii CLryy 82299 2jg299 gTwbii wt56 5p6 ?")) ?DH")) HV !  !r   