def make_musan_babble()

in avhubert/preparation/musan_prepare.py [0:0]


def make_musan_babble(musan_root, rank, nshard):
    babble_dir = f"{musan_root}/babble/wav/"
    num_per_mixture = 30
    sample_rate = 16_000
    num_train, num_valid, num_test = 8000, 1000, 1000
    os.makedirs(babble_dir, exist_ok=True)
    wav_fns = glob.glob(f"{musan_root}/speech/*/*wav")
    spk2wav = {}
    for wav_fn in tqdm(wav_fns):
        speaker = '-'.join(os.path.basename(wav_fn).split('-')[:-1])
        if speaker not in spk2wav:
            spk2wav[speaker] = []
        spk2wav[speaker].append(wav_fn)
    for split in ['train', 'valid', 'test']:
        speakers = [ln.strip() for ln in open(f"{musan_root}/speech/spk.{split}").readlines()]
        num_split = eval(f"num_{split}")
        wav_fns = []
        for x in speakers:
            wav_fns.extend(spk2wav[x])
        print(f"{split} -> # speaker {len(speakers)}, # wav {len(wav_fns)}")
        num_per_shard = math.ceil(num_split/nshard)
        start_id, end_id = num_per_shard*rank, num_per_shard*(rank+1)
        for i in tqdm(range(num_split)):
            if not (i >= start_id and i < end_id):
                continue
            np.random.seed(i)
            perm = np.random.permutation(len(wav_fns))[:num_per_mixture]
            output_fn = f"{babble_dir}/{split}-{str(i+1).zfill(5)}.wav"
            wav_data = mix_audio([wav_fns[x] for x in perm])
            wavfile.write(output_fn, sample_rate, wav_data)
    return