in avhubert/preparation/musan_prepare.py [0:0]
def make_musan_babble(musan_root, rank, nshard):
babble_dir = f"{musan_root}/babble/wav/"
num_per_mixture = 30
sample_rate = 16_000
num_train, num_valid, num_test = 8000, 1000, 1000
os.makedirs(babble_dir, exist_ok=True)
wav_fns = glob.glob(f"{musan_root}/speech/*/*wav")
spk2wav = {}
for wav_fn in tqdm(wav_fns):
speaker = '-'.join(os.path.basename(wav_fn).split('-')[:-1])
if speaker not in spk2wav:
spk2wav[speaker] = []
spk2wav[speaker].append(wav_fn)
for split in ['train', 'valid', 'test']:
speakers = [ln.strip() for ln in open(f"{musan_root}/speech/spk.{split}").readlines()]
num_split = eval(f"num_{split}")
wav_fns = []
for x in speakers:
wav_fns.extend(spk2wav[x])
print(f"{split} -> # speaker {len(speakers)}, # wav {len(wav_fns)}")
num_per_shard = math.ceil(num_split/nshard)
start_id, end_id = num_per_shard*rank, num_per_shard*(rank+1)
for i in tqdm(range(num_split)):
if not (i >= start_id and i < end_id):
continue
np.random.seed(i)
perm = np.random.permutation(len(wav_fns))[:num_per_mixture]
output_fn = f"{babble_dir}/{split}-{str(i+1).zfill(5)}.wav"
wav_data = mix_audio([wav_fns[x] for x in perm])
wavfile.write(output_fn, sample_rate, wav_data)
return