def get_e2e_metrics()

in src/datatuner/lm/metrics.py [0:0]


def get_e2e_metrics(all_predictions, all_references):
    tempdir = Path(mkdtemp())
    human = tempdir / "human_refs.txt"
    system = tempdir / "system.txt"
    with open(human, "w") as h:
        with open(system, "w") as s:
            for i, x in enumerate(all_predictions):
                s.write(x + "\n")
                for j in range(len(all_references)):
                    v = all_references[j][i]
                    if v.strip():
                        h.write(v + "\n")
                h.write("\n")
    print(E2E_METRICS_FOLDER / "measure_scores.py")
    p = Popen(
        [
            PYTHON_BIN,
            E2E_METRICS_FOLDER / "measure_scores.py",
            f"{human}",
            f"{system}",
        ],
        stdin=PIPE,
        stdout=PIPE,
        stderr=PIPE,
    )
    output, err = p.communicate()
    stats = output.decode("utf-8").split("\n")
    stats = [x for x in stats if x not in ["", "==============", "SCORES:"]]
    stats_dict = {}
    for item in stats:
        key, value = item.split(": ")
        value = float(value)
        if key in ["BLEU", "METEOR", "ROUGE_L"]:
            value *= 100
        if key == "BLEU":
            key = "e2e_BLEU"
        stats_dict[key] = value

    return stats_dict