def get_all()

in voxpopuli/segmentation/get_segment_pyannote_speaker.py [0:0]


def get_all(args):
    audio_paths = []
    root = Path(args.root)
    for lang in args.languages:
        audio_paths += get_all_audio_for_lang(root, lang)
    if args.max_num is not None:
        audio_paths = audio_paths[: args.max_num]

    segmenter = FileSegmenter(
        args.root,
        args.output,
        pyannote_cfg=args.pyannote_cfg,
        min_duration=args.min_duration,
        split_vad=not args.no_vad,
        min_dur_vad=args.min_dur_vad,
        max_dur_vad=args.max_dur_vad,
        max_silence_vad=args.max_silence_vad,
    )
    found = 0
    with Pool(args.nproc) as p:
        for x in tqdm(
            p.imap_unordered(segmenter.split_audio, audio_paths), total=len(audio_paths)
        ):
            found += int(x)

    print(f"{found} audio data segmented")