in voxpopuli/segmentation/run_pyannote_sd.py [0:0]
def main(args):
languages = [lang if lang != "original" else "" for lang in args.languages]
root = Path(args.root)
audio_paths = []
for lang in languages:
audio_paths += get_all_audio_for_lang(root, lang)
if not args.overwrite:
audio_paths = [x for x in audio_paths if not check(x, args.pyannote_cfg)]
if args.max_num is not None:
audio_paths = audio_paths[: args.max_num]
n_devices = torch.cuda.device_count()
if n_devices < 2:
for d in audio_paths:
print(d)
get(d)
else:
batches = list(get_batches(audio_paths, batch_size=n_devices))
for batch in tqdm(batches):
torch.multiprocessing.spawn(
fn=get_multiprocess,
args=(batch, args.pyannote_cfg, args.segment_min),
nprocs=n_devices,
)