in utils/find_corpus.py [0:0]
def main(args_list: Optional[list[str]] = None) -> None:
importers = [
"opus",
"sacrebleu",
"mtdata",
"huggingface_mono",
"huggingface_parallel",
"huggingface_any",
"news-crawl",
"hplt-mono",
]
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawTextHelpFormatter, # Preserves whitespace in the help text.
)
parser.add_argument("source", type=str, nargs="?", help="Source language code")
parser.add_argument("target", type=str, nargs="?", help="Target language code")
parser.add_argument(
"--importer",
type=str,
help=f"The importer to use: {', '.join(importers)}",
)
parser.add_argument(
"--download_url",
action="store_true",
default=False,
help="Show the download url if available.",
)
args = parser.parse_args(args_list)
if not args.source or not args.target:
parser.print_help()
sys.exit(1)
if args.importer and args.importer not in importers:
print(f'"{args.importer}" is not a valid importer.')
sys.exit(1)
if args.importer == "opus" or not args.importer:
get_opus(args.source, args.target, args.download_url)
if args.importer == "sacrebleu" or not args.importer:
get_sacrebleu(args.source, args.target)
if args.importer == "mtdata" or not args.importer:
get_mtdata(args.source, args.target)
if args.importer == "huggingface_mono" or not args.importer:
get_huggingface_monolingual(args.target if args.source == "en" else args.source)
if args.importer == "huggingface_parallel" or not args.importer:
get_huggingface_parallel(args.source, args.target)
if args.importer == "huggingface_any" or not args.importer:
get_huggingface_any(args.target if args.source == "en" else args.source)
if args.importer == "news-crawl" or not args.importer:
get_news_crawl(args.source, args.target)