def main()

in voxpopuli/segmentation/get_segment_pyannote_speaker.py [0:0]


def main():
    parser = argparse.ArgumentParser(
        "Cut the data by speaker. " "run_pyanote_sd.py must have been run before"
    )
    parser.add_argument("--root", type=str, required=True, help="Input root directory")
    parser.add_argument(
        "-o",
        "--output",
        type=str,
        default=None,
        help="Output directory, if different from the input " "one",
    )
    parser.add_argument(
        "--languages",
        type=str,
        nargs="*",
        help="If given, Ttranslated data to deal with",
    )
    parser.add_argument(
        "--max-num",
        default=None,
        type=int,
        help="If given, maximum number of session to deal with",
    )
    parser.add_argument("--nproc", default=8, type=int, help="Number of processes")
    parser.add_argument(
        "--pyannote-cfg",
        default="dia_ami",
        type=str,
        choices=["dia", "dia_ami", "sad_ami"],
    )
    parser.add_argument(
        "--min-duration",
        default=1.0,
        type=float,
        help="Ignore all speaker segments lasting less than the given number of seconds",
    )
    parser.add_argument(
        "--no-vad",
        action="store_true",
        help="Does not apply the vad after the speaker segmentation",
    )
    parser.add_argument(
        "--min-dur-vad",
        default=15,
        type=int,
        help="Min size of a sequence (in seconds) after applying the vad.",
    )
    parser.add_argument(
        "--max-dur-vad",
        default=30,
        type=int,
        help="Max size of a sequence (in seconds) after applying the vad.",
    )
    parser.add_argument(
        "--max-silence-vad",
        default=1.5,
        type=float,
        help="Maximum length of a silence allowed in the voice activity detection"
        " (the lower the stricter)",
    )
    args = parser.parse_args()

    if args.output is None:
        args.output = args.root

    if args.languages is None:
        args.languages = [x.value for x in LangCode]

    get_all(args)