in src/train_ner.py [0:0]
def compute_metrics(self, p):
logits, labels = p
predictions = np.argmax(logits, axis=1)
# Post-process the predictions and labels to remove -100 values
true_predictions, true_labels = self.postprocess_predictions_and_labels(predictions, labels)
# Combine metrics
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")
# Calculate metrics
accuracy = accuracy_metric.compute(predictions=true_predictions, references=true_labels)
precision = precision_metric.compute(predictions=true_predictions, references=true_labels, average="weighted")
recall = recall_metric.compute(predictions=true_predictions, references=true_labels, average="weighted")
f1 = f1_metric.compute(predictions=true_predictions, references=true_labels, average="weighted")
return {
"accuracy": accuracy["accuracy"],
"precision": precision["precision"],
"recall": recall["recall"],
"f1": f1["f1"]
}