in src/models.py [0:0]
def forward(self, mono, view):
'''
:param mono: the input signal as a B x 1 x T tensor
:param view: the receiver/transmitter position as a B x 7 x T tensor
:return: out: the binaural output produced by the network
intermediate: a two-channel audio signal obtained from the output of each intermediate layer
as a list of B x 2 x T tensors
'''
warped = self.warper(mono, view)
x = self.input(warped)
_, skips = self.hyperconv_wavenet(x, view)
# collect output and skips after each layer
x = []
for k in range(len(skips), 0, -1):
y = th.mean(th.stack(skips[:k], dim=0), dim=0)
y = self.output_net[k-1](y)
x += [y]
x += [warped]
return {"output": x[0], "intermediate": x[1:]}