sat/sgm/modules/autoencoding/losses/video_loss.py [349:396]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                    in_chan,
                    out_chan,
                    antialiased_downsample=antialiased_downsample,
                )

                blocks.append(block)

                frame_resolution //= 2
            else:
                block = DiscriminatorBlock(
                    in_chan,
                    out_chan,
                    downsample=is_not_last,
                    antialiased_downsample=antialiased_downsample,
                )
                attn_block = nn.Sequential(
                    Residual(
                        LinearSpaceAttention(dim=out_chan, heads=linear_attn_heads, dim_head=linear_attn_dim_head)
                    ),
                    Residual(FeedForward(dim=out_chan, mult=ff_mult, images=True)),
                )

                blocks.append(nn.ModuleList([block, attn_block]))

            image_resolution //= 2

        self.blocks = nn.ModuleList(blocks)

        dim_last = layer_dims[-1]

        downsample_factor = 2**num_layers
        last_fmap_size = tuple(map(lambda n: n // downsample_factor, image_size))

        latent_dim = last_fmap_size[0] * last_fmap_size[1] * dim_last

        self.to_logits = nn.Sequential(
            nn.Conv2d(dim_last, dim_last, 3, padding=1),
            leaky_relu(),
            Rearrange("b ... -> b (...)"),
            nn.Linear(latent_dim, 1),
            Rearrange("b 1 -> b"),
        )

    def forward(self, x):
        for i, layer in enumerate(self.blocks):
            if i < self.temporal_num_layers:
                x = layer(x)
                if i == self.temporal_num_layers - 1:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


sat/sgm/modules/autoencoding/losses/video_loss.py [444:491]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                    in_chan,
                    out_chan,
                    antialiased_downsample=antialiased_downsample,
                )

                blocks.append(block)

                frame_resolution //= 2
            else:
                block = DiscriminatorBlock(
                    in_chan,
                    out_chan,
                    downsample=is_not_last,
                    antialiased_downsample=antialiased_downsample,
                )
                attn_block = nn.Sequential(
                    Residual(
                        LinearSpaceAttention(dim=out_chan, heads=linear_attn_heads, dim_head=linear_attn_dim_head)
                    ),
                    Residual(FeedForward(dim=out_chan, mult=ff_mult, images=True)),
                )

                blocks.append(nn.ModuleList([block, attn_block]))

            image_resolution //= 2

        self.blocks = nn.ModuleList(blocks)

        dim_last = layer_dims[-1]

        downsample_factor = 2**num_layers
        last_fmap_size = tuple(map(lambda n: n // downsample_factor, image_size))

        latent_dim = last_fmap_size[0] * last_fmap_size[1] * dim_last

        self.to_logits = nn.Sequential(
            nn.Conv2d(dim_last, dim_last, 3, padding=1),
            leaky_relu(),
            Rearrange("b ... -> b (...)"),
            nn.Linear(latent_dim, 1),
            Rearrange("b 1 -> b"),
        )

    def forward(self, x):
        for i, layer in enumerate(self.blocks):
            if i < self.temporal_num_layers:
                x = layer(x)
                if i == self.temporal_num_layers - 1:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -