in project/paperbench/paperbench/nano/utils.py [0:0]
def gather_eval_runs(results: list["PaperBenchResult"], n_runs: int) -> list[EvaluationRun]:
"""
Gathers succesfully graded results of nano/eval into a list of n_runs EvaluationRuns
where a single EvaluationRun does not contain more than one evaluation of the same paper.
"""
seed_to_eval_run = {
seed: EvaluationRun(seed=seed, paper_evaluations={}) for seed in range(n_runs)
}
paper_to_cur_seed = {}
if not results:
return list(seed_to_eval_run.values())
for result in results:
if result.judge_output is None or result.judge_output.graded_task_tree is None:
continue
paper_id = result.paper_id
if paper_id not in paper_to_cur_seed:
paper_to_cur_seed[paper_id] = 0
seed = paper_to_cur_seed[paper_id]
paper_to_cur_seed[paper_id] += 1
paper_eval = PaperEvaluation(
paper_id=paper_id,
graded_task_node=result.judge_output.graded_task_tree,
paper_run_id=result.run_id,
)
seed_to_eval_run[seed].paper_evaluations[paper_id] = paper_eval
return list(seed_to_eval_run.values())