in utils/helpers.py [0:0]
def load_audio(wave_file: str):
"""
:param wave_file: .wav file containing the audio input
:return: 1 x T tensor containing input audio resampled to 16kHz
"""
audio, sr = ta.load(wave_file)
if not sr == 16000:
audio = ta.transforms.Resample(sr, 16000)(audio)
if audio.shape[0] > 1:
audio = th.mean(audio, dim=0, keepdim=True)
# normalize such that energy matches average energy of audio used in training
audio = 0.01 * audio / th.mean(th.abs(audio))
return audio