in datasets.py [0:0]
def evaluate_dataset(self, data_args: DataTrainingArguments, model, device, batch_size: int, macro: bool = False) \
-> Dict[str, float]:
"""
Evaluate model on this dataset.
"""
results = Counter()
for example, output_sentence in self.generate_output_sentences(data_args, model, device, batch_size):
new_result = self.evaluate_example(
example=example,
output_sentence=output_sentence,
tokenizer=self.tokenizer,
)
results += new_result
relation_precision, relation_recall, relation_f1 = get_precision_recall_f1(
num_correct=results['correct_relations'],
num_predicted=results['predicted_relations'],
num_gt=results['gt_relations'],
)
relation_precision_no_type, relation_recall_no_type, relation_f1_no_type = get_precision_recall_f1(
num_correct=results['correct_relations_no_type'],
num_predicted=results['predicted_relations_no_type'],
num_gt=results['gt_relations_no_type'],
)
res = {
'relation_precision': relation_precision,
'relation_recall': relation_recall,
'relation_f1': relation_f1,
'relation_precision_no_type': relation_precision_no_type,
'relation_recall_no_type': relation_recall_no_type,
'relation_f1_no_type': relation_f1_no_type,
'num_gt_triggers': results['gt_entities'],
'num_pred_triggers': results['predicted_entities'],
'num_gt_relations': results['gt_relations'],
'num_pred_relations': results['predicted_relations'],
}
return res