def gen_lang_yamls()

in lm_eval/tasks/translation/utils.py [0:0]


def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
    """
    Generate a yaml file for each language.

    :param output_dir: The directory to output the files to.
    :param overwrite: Whether to overwrite files if they already exist.
    """
    err = []
    for lang in LANGUAGES.keys():
        for dataset_name in LANGUAGES[lang]:
            src_lang, _, tgt_lang = dataset_name.partition("-")
            for src, tgt in [[src_lang, tgt_lang], [tgt_lang, src_lang]]:
                # both translation directions for each lang pair
                lang_pair = src + "-" + tgt
                file_name = f"{lang}_{lang_pair}.yaml"
                try:
                    source, target = code_to_language(src), code_to_language(tgt)

                    groups = ["generate_until", "translation", lang]
                    if lang in gpt3_translation_benchmarks.keys():
                        groups += ["gpt3_translation_benchmarks"]

                    with open(
                        f"{output_dir}/{file_name}",
                        "w" if overwrite else "x",
                        encoding="utf8",
                    ) as f:
                        f.write("# Generated by utils.py\n")
                        yaml.dump(
                            {
                                "include": "wmt_common_yaml",
                                "group": groups,
                                "dataset_path": lang,
                                "dataset_name": dataset_name
                                if not (lang == "iwslt2017")
                                else "iwslt2017-" + dataset_name,
                                "task": f"{lang}-{lang_pair}",
                                "doc_to_text": f"{source} phrase: "
                                + "{{translation["
                                + f'"{src}"'
                                + "]}}\n"
                                + f"{target} phrase:",
                                "doc_to_target": " {{"
                                + "translation["
                                + f'"{tgt}"]'
                                + "}}",
                            },
                            f,
                        )
                except FileExistsError:
                    err.append(file_name)

    if len(err) > 0:
        raise FileExistsError(
            "Files were not created because they already exist (use --overwrite flag):"
            f" {', '.join(err)}"
        )