def build_section()

in evals/eval/evaluate.py [0:0]


def build_section(datasets, key, lines, res_dir, evaluation_engine):
    lines.append(f"\n## {key}\n")
    lines.append(f'| Translator/Dataset | {" | ".join(datasets.keys())} |')
    lines.append(f"| {' | '.join(['---' for _ in range(len(datasets) + 1)])} |")

    inverted_formatted = defaultdict(dict)
    inverted_scores = defaultdict(dict)
    comet_comparisons = defaultdict(dict)
    for dataset_name, translators in datasets.items():
        bergamot_res = translators.get("bergamot")
        reordered = sorted(translators.items(), key=lambda x: TRANS_ORDER[x[0]])

        for translator, score in reordered:
            if score == 0:
                formatted_score = "N/A"
            elif translator != "bergamot" and bergamot_res:
                change_perc = (score - bergamot_res) / bergamot_res * 100
                change = score - bergamot_res
                sign = "+" if change > 0 else ""
                formatted_score = f"{score:.2f} ({sign}{change:.2f}, {sign}{change_perc:.2f}%)"
            else:
                formatted_score = f"{score:.2f}"

            inverted_formatted[translator][dataset_name] = formatted_score
            inverted_scores[translator][dataset_name] = score

        # if this is a non-avg comet report, and a cometcompare report exists, we print it
        cometcompare_path = "{}/{}/{}.{}.cometcompare".format(res_dir, key, dataset_name, key)
        if (
            evaluation_engine == "comet"
            and key != "avg"
            and "{}.{}".format(dataset_name, key) not in comet_comparisons
            and exists(cometcompare_path)
        ):
            cometcompare_file = open(cometcompare_path)
            filelines = cometcompare_file.readlines()
            final_report = ""
            for line in filelines:
                if "outperforms" in line:
                    final_report += f"- {line}"
            comet_comparisons["{}.{}".format(dataset_name, key)] = final_report

    for translator, scores in inverted_formatted.items():
        lines.append(f'| {translator} | {" | ".join(scores.values())} |')

    img_path = os.path.join(res_dir, "img", f"{key}-{evaluation_engine}.png")
    plot_lang_pair(datasets, inverted_scores, img_path, evaluation_engine)

    img_relative_path = "/".join(img_path.split("/")[-2:])
    lines.append(f"\n![Results]({img_relative_path})")

    printed_header = False
    for dataset in comet_comparisons:
        if not printed_header:
            lines.append("### Comparisons between systems")
            lines.append(
                "*If a comparison is omitted, the systems have equal averages (tie). Click on the dataset for a complete report*"
            )
            printed_header = True

        lines.append(f"#### [{dataset}]({key}/{dataset}.cometcompare)")
        lines.append(f"{comet_comparisons[dataset]}")

    lines.append("---")