def generate()

in models/diffwave.py [0:0]


    def generate(self, spectrograms: Tensor, training: bool = False) -> Tensor:
        self.model.eval()

        device = spectrograms.device

        if training:
            spectrograms = spectrograms[:, :, :200]

        with torch.no_grad():
            training_noise_schedule = np.array(self.config.model.noise_schedule)
            inference_noise_schedule = np.array(
                self.config.model.inference_noise_schedule
            )

            talpha = 1 - training_noise_schedule
            talpha_cum = np.cumprod(talpha)

            beta = inference_noise_schedule
            alpha = 1 - beta
            alpha_cum = np.cumprod(alpha)

            T = []
            for s in range(len(inference_noise_schedule)):
                for t in range(len(training_noise_schedule) - 1):
                    if talpha_cum[t + 1] <= alpha_cum[s] <= talpha_cum[t]:
                        twiddle = (talpha_cum[t] ** 0.5 - alpha_cum[s] ** 0.5) / (
                            talpha_cum[t] ** 0.5 - talpha_cum[t + 1] ** 0.5
                        )
                        T.append(t + twiddle)
                        break
            T = np.array(T, dtype=np.float32)

            # Expand rank 2 tensors by adding a batch dimension.
            if len(spectrograms.shape) == 2:
                spectrograms = spectrograms.unsqueeze(0)
            spectrograms = spectrograms.to(device)

            audio = torch.randn(
                spectrograms.shape[0],
                MEL_HOP_SAMPLES * spectrograms.shape[-1],
                device=device,
            )

            for n in range(len(alpha) - 1, -1, -1):
                c1 = 1 / alpha[n] ** 0.5
                c2 = beta[n] / (1 - alpha_cum[n]) ** 0.5
                audio = c1 * (
                    audio
                    - c2
                    * self.model(
                        audio, spectrograms, torch.tensor([T[n]], device=audio.device)
                    ).squeeze(1)
                )
                if n > 0:
                    noise = torch.randn_like(audio)
                    sigma = (
                        (1.0 - alpha_cum[n - 1]) / (1.0 - alpha_cum[n]) * beta[n]
                    ) ** 0.5
                    audio += sigma * noise
                audio = torch.clamp(audio, -1.0, 1.0)

        self.model.train()

        return audio.flatten()