in utils/config_generator.py [0:0]
def main() -> None:
parser = argparse.ArgumentParser(
description=__doc__,
# Preserves whitespace in the help text.
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument("source", metavar="SOURCE", type=str, help="The source language tag")
parser.add_argument("target", metavar="TARGET", type=str, help="The target language tag")
parser.add_argument(
"--name",
metavar="name",
type=str,
required=True,
help="The name of the config, which gets constructed like so: configs/autogenerated/{source}-{target}-{name}.yml",
)
parser.add_argument(
"--remote_branch",
metavar="REF",
type=str,
default="origin/main",
help="The remote branch that contains the config.prod.yml. Typically origin/main, or origin/release",
)
parser.add_argument(
"--fast",
action="store_true",
help="Skip slow network requests like looking up dataset size",
)
args = parser.parse_args()
# Validate the inputs.
langtag_re = r"[a-z]{2,3}"
if not re.fullmatch(langtag_re, args.source):
print("The source language should be a 2 or 3 letter lang tag.")
if not re.fullmatch(langtag_re, args.target):
print("The target language should be a 2 or 3 letter lang tag.")
if not re.fullmatch(r"[\w\d-]+", args.name):
print(
"The name of the training config should only contain alphanumeric, underscores, and dashes.",
file=sys.stderr,
)
sys.exit(1)
# ruamel.yaml preserves comments and ordering unlink PyYAML
yaml = ruamel.yaml.YAML()
# Load the prod yaml.
with prod_config_path.open() as f:
yaml_string = f.read()
yaml_string = strip_comments(yaml_string)
prod_config = yaml.load(StringIO(yaml_string))
global aug_mix_modifier
aug_mix_modifier = "aug-mix-cjk" if is_cjk(args.source, args.target) else "aug-mix"
comment_section = update_config(prod_config, args.name, args.source, args.target, args.fast)
final_config = apply_comments_to_yaml_string(
yaml, prod_config, comment_section, args.remote_branch
)
final_config_path = (
root_dir / "configs/autogenerated" / f"{args.source}-{args.target}-{args.name}.yml"
)
print("Writing config to:", str(final_config_path))
final_config_path.write_text(final_config)