in models/context_model.py [0:0]
def __init__(self, classes: int = 128, heads: int = 64, audio_dim: int = 128, model_name: str = "context_model"):
"""
:param classes: number of classes for the categorical latent embedding
:param heads: number of heads for the categorical latent embedding
:param audio_dim: size of the latent audio embedding
:param model_name: name of the model, used to load and save the model
"""
super().__init__(model_name)
self.classes = classes
self.heads = heads
self.audio_dim = audio_dim
hidden = 64
self.embedding = MaskedContextConvolution(ch_in=classes, ch_out=hidden, heads=heads, audio_dim=audio_dim, kernel_size=0)
self.context_layers = th.nn.ModuleList([
MaskedContextConvolution(ch_in=hidden, ch_out=hidden, heads=heads, audio_dim=audio_dim, kernel_size=2, dilation=1),
MaskedContextConvolution(ch_in=hidden, ch_out=hidden, heads=heads, audio_dim=audio_dim, kernel_size=2, dilation=2),
MaskedContextConvolution(ch_in=hidden, ch_out=hidden, heads=heads, audio_dim=audio_dim, kernel_size=2, dilation=4),
MaskedContextConvolution(ch_in=hidden, ch_out=hidden, heads=heads, audio_dim=audio_dim, kernel_size=2, dilation=8),
])
self.logits = MaskedContextConvolution(ch_in=hidden, ch_out=classes, heads=heads, audio_dim=audio_dim, kernel_size=0)