in avhubert/hubert_dataset.py [0:0]
def add_noise(self, clean_wav):
clean_wav = clean_wav.astype(np.float32)
noise_wav = self.select_noise()
if type(self.noise_snr) == int or type(self.noise_snr) == float:
snr = self.noise_snr
elif type(self.noise_snr) == tuple:
snr = np.random.randint(self.noise_snr[0], self.noise_snr[1]+1)
clean_rms = np.sqrt(np.mean(np.square(clean_wav), axis=-1))
if len(clean_wav) > len(noise_wav):
ratio = int(np.ceil(len(clean_wav)/len(noise_wav)))
noise_wav = np.concatenate([noise_wav for _ in range(ratio)])
if len(clean_wav) < len(noise_wav):
start = 0
noise_wav = noise_wav[start: start + len(clean_wav)]
noise_rms = np.sqrt(np.mean(np.square(noise_wav), axis=-1))
adjusted_noise_rms = clean_rms / (10**(snr/20))
adjusted_noise_wav = noise_wav * (adjusted_noise_rms / noise_rms)
mixed = clean_wav + adjusted_noise_wav
#Avoid clipping noise
max_int16 = np.iinfo(np.int16).max
min_int16 = np.iinfo(np.int16).min
if mixed.max(axis=0) > max_int16 or mixed.min(axis=0) < min_int16:
if mixed.max(axis=0) >= abs(mixed.min(axis=0)):
reduction_rate = max_int16 / mixed.max(axis=0)
else :
reduction_rate = min_int16 / mixed.min(axis=0)
mixed = mixed * (reduction_rate)
mixed = mixed.astype(np.int16)
return mixed