in browsecomp_eval.py [0:0]
def grade_sample(self, question: str, correct_answer: str, response: str) -> str:
grader_prompt = GRADER_TEMPLATE.format(
question=question,
correct_answer=correct_answer,
response=response,
)
prompt_messages = [
self.grader_model._pack_message(content=grader_prompt, role="user")
]
grading_response = self.grader_model(prompt_messages)
match = re.search(r"correct: (yes|no)", grading_response)
return match.group(0) if match else "no" # Default to "no" if no match