in denoiser/demucs.py [0:0]
def feed(self, wav):
"""
Apply the model to mix using true real time evaluation.
Normalization is done online as is the resampling.
"""
begin = time.time()
demucs = self.demucs
resample_buffer = self.resample_buffer
stride = self.stride
resample = demucs.resample
if wav.dim() != 2:
raise ValueError("input wav should be two dimensional.")
chin, _ = wav.shape
if chin != demucs.chin:
raise ValueError(f"Expected {demucs.chin} channels, got {chin}")
self.pending = th.cat([self.pending, wav], dim=1)
outs = []
while self.pending.shape[1] >= self.total_length:
self.frames += 1
frame = self.pending[:, :self.total_length]
dry_signal = frame[:, :stride]
if demucs.normalize:
mono = frame.mean(0)
variance = (mono**2).mean()
self.variance = variance / self.frames + (1 - 1 / self.frames) * self.variance
frame = frame / (demucs.floor + math.sqrt(self.variance))
padded_frame = th.cat([self.resample_in, frame], dim=-1)
self.resample_in[:] = frame[:, stride - resample_buffer:stride]
frame = padded_frame
if resample == 4:
frame = upsample2(upsample2(frame))
elif resample == 2:
frame = upsample2(frame)
frame = frame[:, resample * resample_buffer:] # remove pre sampling buffer
frame = frame[:, :resample * self.frame_length] # remove extra samples after window
out, extra = self._separate_frame(frame)
padded_out = th.cat([self.resample_out, out, extra], 1)
self.resample_out[:] = out[:, -resample_buffer:]
if resample == 4:
out = downsample2(downsample2(padded_out))
elif resample == 2:
out = downsample2(padded_out)
else:
out = padded_out
out = out[:, resample_buffer // resample:]
out = out[:, :stride]
if demucs.normalize:
out *= math.sqrt(self.variance)
out = self.dry * dry_signal + (1 - self.dry) * out
outs.append(out)
self.pending = self.pending[:, stride:]
self.total_time += time.time() - begin
if outs:
out = th.cat(outs, 1)
else:
out = th.zeros(chin, 0, device=wav.device)
return out