in 3_optimization-design-ptn/03_prompt-optimization/promptwizard/glue/promptopt/instantiate.py [0:0]
def evaluate(self, test_dataset_jsonl: str) -> float:
"""
Evaluate the performance of self.BEST_PROMPT over test dataset. Return the accuracy.
:param test_dataset_jsonl: Path to jsonl file that has test dataset
:return: Percentage accuracy
"""
start_time = time.time()
self.logger.info(f"Evaluation started {CommonLogsStr.LOG_SEPERATOR}")
if not self.BEST_PROMPT:
self.logger.error(
"BEST_PROMPT attribute is not set. Please set self.BEST_PROMPT attribute of this object, "
"either manually or by calling get_best_prompt() method."
)
return
total_correct = 0
total_count = 0
for json_obj in read_jsonl_row(test_dataset_jsonl):
answer = self.predict_and_access(
json_obj[DatasetSpecificProcessing.QUESTION_LITERAL],
json_obj[DatasetSpecificProcessing.FINAL_ANSWER_LITERAL],
)
total_correct += answer[self.EvalLiterals.IS_CORRECT]
total_count += 1
result = {
"accuracy": f"{total_correct}/{total_count} : {total_correct/total_count}%",
"predicted": answer[self.EvalLiterals.PREDICTED_ANS],
"actual": json_obj[DatasetSpecificProcessing.FINAL_ANSWER_LITERAL],
}
self.iolog.append_dict_to_chained_logs(result)
self.logger.info(result)
self.iolog.dump_chained_log_to_file(
file_name=f"eval_result_{self.setup_config.experiment_name}"
)
self.logger.info(f"Time taken for evaluation: {(time.time() - start_time)} sec")
return total_correct / total_count