in muse/modeling_movq.py [0:0]
def __init__(self, config):
super().__init__()
self.config = config
# compute in_channel_mult, block_in and curr_res at lowest res
block_in = self.config.hidden_channels * self.config.channel_mult[self.config.num_resolutions - 1]
curr_res = self.config.resolution // 2 ** (self.config.num_resolutions - 1)
self.z_shape = (1, self.config.z_channels, curr_res, curr_res)
# z to block_in
self.conv_in = nn.Conv2d(
self.config.z_channels,
block_in,
kernel_size=3,
stride=1,
padding=1,
)
# middle
self.mid = MidBlock(config, block_in, zq_ch=self.config.quantized_embed_dim, dropout=self.config.dropout)
# upsampling
upsample_blocks = []
for i_level in reversed(range(self.config.num_resolutions)):
upsample_blocks.append(
UpsamplingBlock(self.config, curr_res, block_idx=i_level, zq_ch=self.config.quantized_embed_dim)
)
if i_level != 0:
curr_res = curr_res * 2
self.up = nn.ModuleList(list(reversed(upsample_blocks))) # reverse to get consistent order
# end
block_out = self.config.hidden_channels * self.config.channel_mult[0]
self.norm_out = Normalize(block_out, self.config.quantized_embed_dim, False)
self.conv_out = nn.Conv2d(
block_out,
self.config.num_channels,
kernel_size=3,
stride=1,
padding=1,
)