def main()

in utils/merge_generative.py [0:0]


def main():
    # find all json file in directory
    root_dir = Path(sys.argv[1])
    out_path = os.path.join(root_dir, "merged.json")
    if os.path.exists(out_path):
        os.remove(out_path)

    all_jsons = find_all_json(root_dir)
    # merge
    results = {}
    for json_file in all_jsons:
        with open(json_file, "r") as fi:
            data = json.load(fi)

        if str(json_file.name).startswith("slim"):
            print(f"Parsing {json_file} as bigscience/lm-eval-harness file.")
            fewshots = data["config"]["num_fewshot"]
            for dic in data["results"]:
                key = dic["task_name"]
                # Same dataset but not really comparable
                if "en-fr" in dic["prompt_name"]:
                    key += "_en-fr"
                elif "fr-en" in dic["prompt_name"]:
                    key += "_fr-en"
                elif "hi-en" in dic["prompt_name"]:
                    key += "_hi-en"
                elif "en-hi" in dic["prompt_name"]:
                    key += "_en-hi"
                sub_key = dic["prompt_name"]
                results.setdefault(key, {})
                results[key].setdefault(fewshots, {})
                results[key][fewshots].setdefault(sub_key, {})
                results[key][fewshots][sub_key] = {
                    **results[key][fewshots][sub_key],
                    **{subk: subv for subk, subv in dic.items() if type(subv) in [int, float]}
                }
        elif str(json_file.name).startswith("agg"):
            print(f"Skipping {json_file} from bigscience/lm-eval-harness.")
            continue
        else:
            print(f"Parsing {json_file} as bigscience/t-zero file.")
            key = f"{data['dataset_name']}_{data['dataset_config_name']}"
            fewshots = 0
            if key in results:
                assert data["template_name"] not in results
                results.setdefault(key, {})
                results[key].setdefault(fewshots, {})
                results[key][fewshots][data["template_name"]] = data
            else:
                results[key] = {
                    fewshots: {
                        data["template_name"]: data
                    }
                }

    # sort
    sorted_results = sort_dict(results)

    # write
    with open(out_path, "w") as fo:
        json.dump(sorted_results, fo)