in human_eval/evaluation.py [0:0]
def combine_results():
for sample in stream_jsonl(sample_file):
task_id = sample["task_id"]
result = results[task_id].pop(0)
sample["result"] = result[1]["result"]
sample["passed"] = result[1]["passed"]
yield sample