in src/datatuner/lm/metrics.py [0:0]
def get_e2e_metrics(all_predictions, all_references):
tempdir = Path(mkdtemp())
human = tempdir / "human_refs.txt"
system = tempdir / "system.txt"
with open(human, "w") as h:
with open(system, "w") as s:
for i, x in enumerate(all_predictions):
s.write(x + "\n")
for j in range(len(all_references)):
v = all_references[j][i]
if v.strip():
h.write(v + "\n")
h.write("\n")
print(E2E_METRICS_FOLDER / "measure_scores.py")
p = Popen(
[
PYTHON_BIN,
E2E_METRICS_FOLDER / "measure_scores.py",
f"{human}",
f"{system}",
],
stdin=PIPE,
stdout=PIPE,
stderr=PIPE,
)
output, err = p.communicate()
stats = output.decode("utf-8").split("\n")
stats = [x for x in stats if x not in ["", "==============", "SCORES:"]]
stats_dict = {}
for item in stats:
key, value = item.split(": ")
value = float(value)
if key in ["BLEU", "METEOR", "ROUGE_L"]:
value *= 100
if key == "BLEU":
key = "e2e_BLEU"
stats_dict[key] = value
return stats_dict