in sagemaker-voice-classification/notebook/coswara_dataset.py [0:0]
def __getitem__(self, index):
self.curfile = self.file_names[index]
try:
waveform, sample_rate = torchaudio.load(self.curfile)
waveform = torchaudio.transforms.Resample(sample_rate, self.new_sr)(waveform[self.channel, :].view(1, -1))
# Zero padding to keep desired audio length in seconds
const_len = self.new_sr * self.audio_len
tempData = torch.zeros([1, const_len])
if waveform.shape[1] < const_len:
tempData[0, : waveform.shape[1]] = waveform[:]
else:
tempData[0, :] = waveform[0, :const_len]
sound = tempData
# Resampling, need to cater for audio shorter than new length
const_len = self.new_sr * self.audio_len
tempData = torch.zeros([1, const_len])
if sound.shape[1] < const_len:
tempData[0, : sound.shape[1]] = sound[:]
else:
tempData[0, :] = sound[0, :const_len]
sound = tempData
# Resampling
new_const_len = const_len // self.sampling_ratio
soundFormatted = torch.zeros([1, new_const_len])
soundFormatted[0, :] = sound[0, ::5]
return soundFormatted, self.labels[index]
except RuntimeError:
print("Get Item RuntimeError for file: {}".format(self.curfile))
except ExecuteUserScriptError:
print("Get Item ExecuteUserScriptError for file: {}".format(self.curfile))