in pyrit/score/question_answer_scorer.py [0:0]
def report_scores(self, responses: list[PromptRequestResponse]) -> None:
"""
Reports the score values from the list of prompt request responses
Checks for presence of scores in reponse before scoring
Args:
responses (list[PromptRequestResponse]): The list of responses to be reported on
"""
correct_count = 0
if any(not response.request_pieces[0].scores for response in responses):
raise ValueError("Not all responses have scores, please score all responses before reporting")
if any(response.request_pieces[0].scores[0].score_type != "true_false" for response in responses):
raise ValueError("Score types are not 'true_false'")
for response in responses:
score_metadata = json.loads(response.request_pieces[0].scores[0].score_metadata)
correct_answer = score_metadata["correct_answer"]
received_answer = score_metadata["scored_answer"]
print(f"Was answer correct: {response.request_pieces[0].scores[0].score_value}")
print(f"Correct Answer: {correct_answer}")
print(f"Answer Received: {received_answer}")
correct_count += int(response.request_pieces[0].scores[0].score_value == "True")
print(f"Correct / Total: {correct_count} / {len(responses)}")