in evaluate.py [0:0]
def chunked_forwarding(net, mono, view):
'''
binauralized the mono input given the view
:param net: binauralization network
:param mono: 1 x T tensor containing the mono audio signal
:param view: 7 x K tensor containing the view as 3D positions and quaternions for orientation (K = T / 400)
:return: 2 x T tensor containing binauralized audio signal
'''
net.eval().cuda()
mono, view = mono.cuda(), view.cuda()
chunk_size = 480000 # forward in chunks of 10s
rec_field = net.receptive_field() + 1000 # add 1000 samples as "safe bet" since warping has undefined rec. field
rec_field -= rec_field % 400 # make sure rec_field is a multiple of 400 to match audio and view frequencies
chunks = [
{
"mono": mono[:, max(0, i-rec_field):i+chunk_size],
"view": view[:, max(0, i-rec_field)//400:(i+chunk_size)//400]
}
for i in range(0, mono.shape[-1], chunk_size)
]
for i, chunk in enumerate(chunks):
with th.no_grad():
mono = chunk["mono"].unsqueeze(0)
view = chunk["view"].unsqueeze(0)
binaural = net(mono, view)["output"].squeeze(0)
if i > 0:
binaural = binaural[:, -(mono.shape[-1]-rec_field):]
chunk["binaural"] = binaural
binaural = th.cat([chunk["binaural"] for chunk in chunks], dim=-1)
binaural = th.clamp(binaural, min=-1, max=1).cpu()
return binaural