in T5DST/T5.py [0:0]
def evaluate_model(args, tokenizer, model, test_loader, save_path, ALL_SLOTS, prefix="zeroshot"):
save_path = os.path.join(save_path,"results")
if not os.path.exists(save_path):
os.makedirs(save_path)
predictions = {}
# to gpu
# gpu = args["GPU"][0]
device = torch.device("cuda:0")
model.to(device)
model.eval()
slot_logger = {slot_name:[0,0,0] for slot_name in ALL_SLOTS}
for batch in tqdm(test_loader):
dst_outputs = model.generate(input_ids=batch["encoder_input"].to(device),
attention_mask=batch["attention_mask"].to(device),
eos_token_id=tokenizer.eos_token_id,
max_length=200,
)
value_batch = tokenizer.batch_decode(dst_outputs, skip_special_tokens=True)
for idx, value in enumerate(value_batch):
dial_id = batch["ID"][idx]
if dial_id not in predictions:
predictions[dial_id] = {}
predictions[dial_id]["domain"] = batch["domains"][idx][0]
predictions[dial_id]["turns"] = {}
if batch["turn_id"][idx] not in predictions[dial_id]["turns"]:
predictions[dial_id]["turns"][batch["turn_id"][idx]] = {"turn_belief":batch["turn_belief"][idx], "pred_belief":[]}
if value!="none":
predictions[dial_id]["turns"][batch["turn_id"][idx]]["pred_belief"].append(str(batch["slot_text"][idx])+'-'+str(value))
# analyze slot acc:
if str(value)==str(batch["value_text"][idx]):
slot_logger[str(batch["slot_text"][idx])][1]+=1 # hit
slot_logger[str(batch["slot_text"][idx])][0]+=1 # total
for slot_log in slot_logger.values():
slot_log[2] = slot_log[1]/slot_log[0]
with open(os.path.join(save_path, f"{prefix}_slot_acc.json"), 'w') as f:
json.dump(slot_logger,f, indent=4)
with open(os.path.join(save_path, f"{prefix}_prediction.json"), 'w') as f:
json.dump(predictions,f, indent=4)
joint_acc_score, F1_score, turn_acc_score = evaluate_metrics(predictions, ALL_SLOTS)
evaluation_metrics = {"Joint Acc":joint_acc_score, "Turn Acc":turn_acc_score, "Joint F1":F1_score}
print(f"{prefix} result:",evaluation_metrics)
with open(os.path.join(save_path, f"{prefix}_result.json"), 'w') as f:
json.dump(evaluation_metrics,f, indent=4)
return predictions