in project/paperbench/paperbench/metrics.py [0:0]
def parse_entry_by_format(entry: dict, format: Literal["old", "new"]) -> ParsedEntry | None:
if format == "old":
pb_result = entry["data"]["pb_result"]
if not pb_result.get("grader_success"):
return None
graded_task_tree = GradedTaskNode.from_dict(
pb_result["grader_output"]["graded_task_tree"]
)
elif format == "new":
pb_result = entry["data"]["pb_result"]["paperbench_result"]
graded_task_tree = GradedTaskNode.from_dict(
pb_result["judge_output"]["graded_task_tree"]
)
run_group_id = entry["data"]["run_group_id"]
paper_run_id = entry["data"]["run_id"]
agent_id = run_group_id.split("_")[-1]
paper_id = pb_result["paper_id"]
timestamp = dateutil.parser.parse(entry["timestamp"]).timestamp()
return ParsedEntry(
agent_id=agent_id,
paper_id=paper_id,
paper_run_id=paper_run_id,
timestamp=timestamp,
graded_task_tree=graded_task_tree,
)