in lm_eval/loggers/wandb_logger.py [0:0]
def _log_results_as_table(self) -> None:
"""Generate and log evaluation results as a table to W&B."""
columns = [
"Version",
"Filter",
"num_fewshot",
"Metric",
"Value",
"Stderr",
]
def make_table(columns: List[str], key: str = "results"):
import wandb
table = wandb.Table(columns=columns)
results = copy.deepcopy(self.results)
for k, dic in results.get(key).items():
if k in self.group_names and not key == "groups":
continue
version = results.get("versions").get(k)
if version == "N/A":
version = None
n = results.get("n-shot").get(k)
for (mf), v in dic.items():
m, _, f = mf.partition(",")
if m.endswith("_stderr"):
continue
if m == "alias":
continue
if m + "_stderr" + "," + f in dic:
se = dic[m + "_stderr" + "," + f]
if se != "N/A":
se = "%.4f" % se
table.add_data(*[k, version, f, n, m, str(v), str(se)])
else:
table.add_data(*[k, version, f, n, m, str(v), ""])
return table
# log the complete eval result to W&B Table
table = make_table(["Tasks"] + columns, "results")
self.run.log({"evaluation/eval_results": table})
if "groups" in self.results.keys():
table = make_table(["Groups"] + columns, "groups")
self.run.log({"evaluation/group_eval_results": table})