in voxpopuli/segmentation/get_segment_pyannote_speaker.py [0:0]
def main():
parser = argparse.ArgumentParser(
"Cut the data by speaker. " "run_pyanote_sd.py must have been run before"
)
parser.add_argument("--root", type=str, required=True, help="Input root directory")
parser.add_argument(
"-o",
"--output",
type=str,
default=None,
help="Output directory, if different from the input " "one",
)
parser.add_argument(
"--languages",
type=str,
nargs="*",
help="If given, Ttranslated data to deal with",
)
parser.add_argument(
"--max-num",
default=None,
type=int,
help="If given, maximum number of session to deal with",
)
parser.add_argument("--nproc", default=8, type=int, help="Number of processes")
parser.add_argument(
"--pyannote-cfg",
default="dia_ami",
type=str,
choices=["dia", "dia_ami", "sad_ami"],
)
parser.add_argument(
"--min-duration",
default=1.0,
type=float,
help="Ignore all speaker segments lasting less than the given number of seconds",
)
parser.add_argument(
"--no-vad",
action="store_true",
help="Does not apply the vad after the speaker segmentation",
)
parser.add_argument(
"--min-dur-vad",
default=15,
type=int,
help="Min size of a sequence (in seconds) after applying the vad.",
)
parser.add_argument(
"--max-dur-vad",
default=30,
type=int,
help="Max size of a sequence (in seconds) after applying the vad.",
)
parser.add_argument(
"--max-silence-vad",
default=1.5,
type=float,
help="Maximum length of a silence allowed in the voice activity detection"
" (the lower the stricter)",
)
args = parser.parse_args()
if args.output is None:
args.output = args.root
if args.languages is None:
args.languages = [x.value for x in LangCode]
get_all(args)