in demucs/demucs.py [0:0]
def __init__(self, channels: int, compress: float = 4, depth: int = 2, init: float = 1e-4,
norm=True, attn=False, heads=4, ndecay=4, lstm=False, gelu=True,
kernel=3, dilate=True):
"""
Args:
channels: input/output channels for residual branch.
compress: amount of channel compression inside the branch.
depth: number of layers in the residual branch. Each layer has its own
projection, and potentially LSTM and attention.
init: initial scale for LayerNorm.
norm: use GroupNorm.
attn: use LocalAttention.
heads: number of heads for the LocalAttention.
ndecay: number of decay controls in the LocalAttention.
lstm: use LSTM.
gelu: Use GELU activation.
kernel: kernel size for the (dilated) convolutions.
dilate: if true, use dilation, increasing with the depth.
"""
super().__init__()
assert kernel % 2 == 1
self.channels = channels
self.compress = compress
self.depth = abs(depth)
dilate = depth > 0
norm_fn: tp.Callable[[int], nn.Module]
norm_fn = lambda d: nn.Identity() # noqa
if norm:
norm_fn = lambda d: nn.GroupNorm(1, d) # noqa
hidden = int(channels / compress)
act: tp.Type[nn.Module]
if gelu:
act = nn.GELU
else:
act = nn.ReLU
self.layers = nn.ModuleList([])
for d in range(self.depth):
dilation = 2 ** d if dilate else 1
padding = dilation * (kernel // 2)
mods = [
nn.Conv1d(channels, hidden, kernel, dilation=dilation, padding=padding),
norm_fn(hidden), act(),
nn.Conv1d(hidden, 2 * channels, 1),
norm_fn(2 * channels), nn.GLU(1),
LayerScale(channels, init),
]
if attn:
mods.insert(3, LocalState(hidden, heads=heads, ndecay=ndecay))
if lstm:
mods.insert(3, BLSTM(hidden, layers=2, max_steps=200, skip=True))
layer = nn.Sequential(*mods)
self.layers.append(layer)