in datasets.py [0:0]
def evaluate_dataset(self, data_args: DataTrainingArguments, model, device, batch_size: int, macro: bool = False) \
-> Dict[str, float]:
"""
Evaluate model on this dataset.
"""
results = Counter()
for example, output_sentence in self.generate_output_sentences(data_args, model, device, batch_size):
new_result = self.evaluate_example(
example=example,
output_sentence=output_sentence,
model=model,
tokenizer=self.tokenizer,
)
results += new_result
entity_precision, entity_recall, entity_f1 = get_precision_recall_f1(
num_correct=results['correct_entities'],
num_predicted=results['predicted_entities'],
num_gt=results['gt_entities'],
)
entity_precision_no_type, entity_recall_no_type, entity_f1_no_type = get_precision_recall_f1(
num_correct=results['correct_entities_no_type'],
num_predicted=results['predicted_entities_no_type'],
num_gt=results['gt_entities_no_type'],
)
entity_precision_by_type = []
entity_recall_by_type = []
entity_f1_by_type = []
if macro:
# compute also entity macro scores
for entity_type in self.entity_types.values():
precision, recall, f1 = get_precision_recall_f1(
num_correct=results['correct_entities', entity_type.natural],
num_predicted=results['predicted_entities', entity_type.natural],
num_gt=results['gt_entities', entity_type.natural],
)
entity_precision_by_type.append(precision)
entity_recall_by_type.append(recall)
entity_f1_by_type.append(f1)
intent_precision, intent_recall, intent_f1 = get_precision_recall_f1(
num_correct=results['correct_intent'],
num_predicted=results['predicted_intent'],
num_gt=results['gt_intent']
)
res = {
'wrong_reconstruction': results['wrong_reconstructions'] / results['num_sentences'],
'label_error': results['label_error'] / results['num_sentences'],
'format_error': results['format_error'] / results['num_sentences'],
'intent_precision': intent_precision,
'intent_recall': intent_recall,
'intent_f1': intent_f1,
'entity_precision': entity_precision,
'entity_recall': entity_recall,
'entity_f1': entity_f1,
'entity_precision_no_type': entity_precision_no_type,
'entity_recall_no_type': entity_recall_no_type,
'entity_f1_no_type': entity_f1_no_type,
}
if macro:
res.update({
'entity_macro_precision': np.mean(np.array(entity_precision_by_type)),
'entity_macro_recall': np.mean(np.array(entity_recall_by_type)),
'entity_macro_f1': np.mean(np.array(entity_f1_by_type)),
})
return res