in docker_images/speechbrain/app/pipelines/text_to_speech.py [0:0]
def __call__(self, inputs: str) -> Tuple[np.array, int]:
"""
Args:
inputs (:obj:`str`):
The text to generate audio from
Return:
A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int.
"""
if not inputs.replace("\0", "").strip():
inputs = "Empty query"
if self.type == "tacotron2":
mel_output, _, _ = self.model.encode_text(inputs)
elif self.type == "fastspeech2":
mel_output, _, _, _ = self.model.encode_text(
[inputs], pace=1.0, pitch_rate=1.0, energy_rate=1.0
)
waveforms = self.vocoder_model.decode_batch(mel_output).numpy()
return waveforms, self.sampling_rate