in muse/modeling_taming_vqgan.py [0:0]
def __init__(self, config):
super().__init__()
self.config = config
# compute in_channel_mult, block_in and curr_res at lowest res
block_in = self.config.hidden_channels * self.config.channel_mult[self.config.num_resolutions - 1]
curr_res = self.config.resolution // 2 ** (self.config.num_resolutions - 1)
self.z_shape = (1, self.config.z_channels, curr_res, curr_res)
# z to block_in
self.conv_in = nn.Conv2d(
self.config.z_channels,
block_in,
kernel_size=3,
stride=1,
padding=1,
)
# middle
self.mid = MidBlock(config, block_in, self.config.no_attn_mid_block, self.config.dropout)
# upsampling
upsample_blocks = []
for i_level in reversed(range(self.config.num_resolutions)):
upsample_blocks.append(UpsamplingBlock(self.config, curr_res, block_idx=i_level))
if i_level != 0:
curr_res = curr_res * 2
self.up = nn.ModuleList(list(reversed(upsample_blocks))) # reverse to get consistent order
# end
block_out = self.config.hidden_channels * self.config.channel_mult[0]
self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_out, eps=1e-6, affine=True)
self.conv_out = nn.Conv2d(
block_out,
self.config.num_channels,
kernel_size=3,
stride=1,
padding=1,
)