in model/utils/response_evaluation.py [0:0]
def main(args):
print("Reading: {}".format(args["data_json_path"]))
with open(args["data_json_path"], "r") as file_id:
gt_responses = json.load(file_id)
print("Reading: {}".format(args["model_response_path"]))
with open(args["model_response_path"], "r") as file_id:
model_responses = json.load(file_id)
if args["record_instance_results"]:
instance_results_path = args["model_response_path"].replace(
".json", "_results.json"
)
else:
instance_results_path = None
bleu_score, bleu_std_err = evaluate_response_generation(
gt_responses,
model_responses,
args["single_round_evaluation"],
instance_results_path,
)
print(f"BLEU Score: {bleu_score:.4f} +- {bleu_std_err}")