in models/diffwave.py [0:0]
def generate(self, spectrograms: Tensor, training: bool = False) -> Tensor:
self.model.eval()
device = spectrograms.device
if training:
spectrograms = spectrograms[:, :, :200]
with torch.no_grad():
training_noise_schedule = np.array(self.config.model.noise_schedule)
inference_noise_schedule = np.array(
self.config.model.inference_noise_schedule
)
talpha = 1 - training_noise_schedule
talpha_cum = np.cumprod(talpha)
beta = inference_noise_schedule
alpha = 1 - beta
alpha_cum = np.cumprod(alpha)
T = []
for s in range(len(inference_noise_schedule)):
for t in range(len(training_noise_schedule) - 1):
if talpha_cum[t + 1] <= alpha_cum[s] <= talpha_cum[t]:
twiddle = (talpha_cum[t] ** 0.5 - alpha_cum[s] ** 0.5) / (
talpha_cum[t] ** 0.5 - talpha_cum[t + 1] ** 0.5
)
T.append(t + twiddle)
break
T = np.array(T, dtype=np.float32)
# Expand rank 2 tensors by adding a batch dimension.
if len(spectrograms.shape) == 2:
spectrograms = spectrograms.unsqueeze(0)
spectrograms = spectrograms.to(device)
audio = torch.randn(
spectrograms.shape[0],
MEL_HOP_SAMPLES * spectrograms.shape[-1],
device=device,
)
for n in range(len(alpha) - 1, -1, -1):
c1 = 1 / alpha[n] ** 0.5
c2 = beta[n] / (1 - alpha_cum[n]) ** 0.5
audio = c1 * (
audio
- c2
* self.model(
audio, spectrograms, torch.tensor([T[n]], device=audio.device)
).squeeze(1)
)
if n > 0:
noise = torch.randn_like(audio)
sigma = (
(1.0 - alpha_cum[n - 1]) / (1.0 - alpha_cum[n]) * beta[n]
) ** 0.5
audio += sigma * noise
audio = torch.clamp(audio, -1.0, 1.0)
self.model.train()
return audio.flatten()