sat/sgm/modules/autoencoding/losses/video_loss.py [312:347]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    @beartype
    def __init__(
        self,
        *,
        dim,
        image_size,
        frame_num,
        channels=3,
        max_dim=512,
        linear_attn_dim_head=8,
        linear_attn_heads=16,
        ff_mult=4,
        antialiased_downsample=False,
    ):
        super().__init__()
        image_size = pair(image_size)
        min_image_resolution = min(image_size)

        num_layers = int(log2(min_image_resolution) - 2)
        temporal_num_layers = int(log2(frame_num))
        self.temporal_num_layers = temporal_num_layers

        layer_dims = [channels] + [(dim * 4) * (2**i) for i in range(num_layers + 1)]
        layer_dims = [min(layer_dim, max_dim) for layer_dim in layer_dims]
        layer_dims_in_out = tuple(zip(layer_dims[:-1], layer_dims[1:]))

        blocks = []

        image_resolution = min_image_resolution
        frame_resolution = frame_num

        for ind, (in_chan, out_chan) in enumerate(layer_dims_in_out):
            num_layer = ind + 1
            is_not_last = ind != (len(layer_dims_in_out) - 1)

            if ind < temporal_num_layers:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



sat/sgm/modules/autoencoding/losses/video_loss.py [407:442]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    @beartype
    def __init__(
        self,
        *,
        dim,
        image_size,
        frame_num,
        channels=3,
        max_dim=512,
        linear_attn_dim_head=8,
        linear_attn_heads=16,
        ff_mult=4,
        antialiased_downsample=False,
    ):
        super().__init__()
        image_size = pair(image_size)
        min_image_resolution = min(image_size)

        num_layers = int(log2(min_image_resolution) - 2)
        temporal_num_layers = int(log2(frame_num))
        self.temporal_num_layers = temporal_num_layers

        layer_dims = [channels] + [(dim * 4) * (2**i) for i in range(num_layers + 1)]
        layer_dims = [min(layer_dim, max_dim) for layer_dim in layer_dims]
        layer_dims_in_out = tuple(zip(layer_dims[:-1], layer_dims[1:]))

        blocks = []

        image_resolution = min_image_resolution
        frame_resolution = frame_num

        for ind, (in_chan, out_chan) in enumerate(layer_dims_in_out):
            num_layer = ind + 1
            is_not_last = ind != (len(layer_dims_in_out) - 1)

            if ind < temporal_num_layers:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



