def compute_metrics()

in src/train_ner.py [0:0]


    def compute_metrics(self, p):
        logits, labels = p
        predictions = np.argmax(logits, axis=1)
        
        # Post-process the predictions and labels to remove -100 values
        true_predictions, true_labels = self.postprocess_predictions_and_labels(predictions, labels)

        # Combine metrics
        accuracy_metric = evaluate.load("accuracy")
        precision_metric = evaluate.load("precision")
        recall_metric = evaluate.load("recall")
        f1_metric = evaluate.load("f1")

        # Calculate metrics
        accuracy = accuracy_metric.compute(predictions=true_predictions, references=true_labels)
        precision = precision_metric.compute(predictions=true_predictions, references=true_labels, average="weighted")
        recall = recall_metric.compute(predictions=true_predictions, references=true_labels, average="weighted")
        f1 = f1_metric.compute(predictions=true_predictions, references=true_labels, average="weighted")

        return {
            "accuracy": accuracy["accuracy"],
            "precision": precision["precision"],
            "recall": recall["recall"],
            "f1": f1["f1"]
        }