in datasets.py [0:0]
def evaluate_dataset(self, data_args, model, device, batch_size=8, macro=False, by_relation_type=False) \
-> Dict[str, float]:
"""
Evaluate model on this dataset.
"""
documents_to_chunk_data = defaultdict(list)
predictions = {}
for example, output_sentence in self.generate_output_sentences(data_args, model, device, batch_size):
document_id = example.document_id
data = self.output_format.run_inference(
example=example,
output_sentence=output_sentence,
)
# add offset to all indices
offset = example.offset
data = [tuple(tuple(y + offset for y in x) for x in z) for z in data if z[1] is not None]
documents_to_chunk_data[document_id].append(data)
if len(documents_to_chunk_data[document_id]) == len(self.documents[document_id].chunks):
# process predictions for this document
predictions[document_id] = self.get_document_predictions(documents_to_chunk_data[document_id])
predictions_list = []
labels_list = []
for document_id, document in self.documents.items():
predictions_list.append(predictions[document_id])
labels_list.append([
[(entity.start, entity.end) for entity in group]
for group in document.groups
])
metrics = CorefAllMetrics().get_all_metrics(labels_list, predictions_list)
return {
f'{metric_name}_{x}': v
for metric_name, metric_values in metrics['micro'].items()
for x, v in metric_values.items()
}