in common.py [0:0]
def aggregate_results( single_eval_results: list[SingleEvalResult], default_stats: tuple[str] = ("mean", "std"), name2stats: dict[str, tuple[str]] | None = None,