def postprocess_predictions_and_labels()

in src/train_ner.py [0:0]


    def postprocess_predictions_and_labels(self, predictions, references):
        true_predictions = []
        true_labels = []
        cmp_count = 0

        for prediction, reference in zip(predictions, references):
            # Only keep labels that are not -100
            true_labels_example = [label for label in reference if label != -100]
            
            # Align predictions: Remove predictions for which the corresponding reference label is -100
            true_predictions_example = [pred for pred, ref in zip(prediction, reference) if ref != -100]

            # Ensure the length of predictions and labels matches
            if len(true_predictions_example) == len(true_labels_example):
                true_labels.append(true_labels_example)
                true_predictions.append(true_predictions_example)
                cmp_count += 1
            else:
                # Log or handle the error (example-level mismatch)
                # print(f"Skipping example due to mismatch: predictions ({len(true_predictions_example)}), labels ({len(true_labels_example)})")
                continue  # Skip this example

        # Flatten the lists (convert from list of lists to a single list)
        true_predictions = [pred for sublist in true_predictions for pred in sublist]
        true_labels = [label for sublist in true_labels for label in sublist]
        logger.info(f"cmp_count = {cmp_count} out of {len(predictions)}")

        return true_predictions, true_labels