in models/context_model.py [0:0]
def __init__(self, ch_in: int, ch_out: int, heads: int, audio_dim: int, kernel_size: int = 1, dilation: int = 1):
"""
:param ch_in: number of input channels to the layer
:param ch_out: number of output channels to the layer
:param heads: number of heads
:param audio_dim: size of the latent audio embedding
:param kernel_size: kernel size of the convolution
:param dilation: dilation used in the convolution
"""
super().__init__()
self.ch_in = ch_in
self.ch_out = ch_out
self.heads = heads
self.audio_dim = audio_dim
self.kernel_size = kernel_size
self.dilation = dilation
self.unmasked_linear = th.nn.Conv1d(audio_dim, ch_out * heads, kernel_size=1)
self.masked_linear = th.nn.Conv1d(ch_in * heads, ch_out * heads, kernel_size=1)
mask = th.ones(ch_out * heads, ch_in * heads, 1)
for i in range(heads):
mask[ch_out * i:ch_out * (i+1), ch_in * i:, :] = 0
self.register_buffer("mask", mask)
if kernel_size > 0:
self.historic = th.nn.Conv1d(ch_in * heads, ch_out * heads, kernel_size=kernel_size, dilation=dilation)
self.reset()