def set_train_set

def set_train_set_metrics()

in src/setfit/model_card.py [0:0]
42 lines of code
13 McCabe index (conditional complexity)

    def set_train_set_metrics(self, dataset: Dataset) -> None:
        def add_naive_word_count(sample: Dict[str, Any]) -> Dict[str, Any]:
            sample["word_count"] = len(sample["text"].split(" "))
            return sample

        dataset = dataset.map(add_naive_word_count)
        self.train_set_metrics_list = [
            {
                "Training set": "Word count",
                "Min": min(dataset["word_count"]),
                "Median": sum(dataset["word_count"]) / len(dataset),
                "Max": max(dataset["word_count"]),
            },
        ]
        # E.g. if unlabeled via DistillationTrainer
        if "label" not in dataset.column_names:
            return

        sample_label = dataset[0]["label"]
        if isinstance(sample_label, collections.abc.Sequence) and not isinstance(sample_label, str):
            return
        try:
            counter = Counter(dataset["label"])
            if self.model.labels:
                self.train_set_sentences_per_label_list = [
                    {
                        "Label": str_label,
                        "Training Sample Count": counter[
                            str_label if isinstance(sample_label, str) else self.model.label2id[str_label]
                        ],
                    }
                    for str_label in self.model.labels
                ]
            else:
                self.train_set_sentences_per_label_list = [
                    {
                        "Label": (
                            self.model.labels[label] if self.model.labels and isinstance(label, int) else str(label)
                        ),
                        "Training Sample Count": count,
                    }
                    for label, count in sorted(counter.items())
                ]
        except Exception:
            # There are some tricky edge cases possible, e.g. if the user provided integer labels that do not fall
            # between 0 to num_classes-1, so we make sure we never cause errors.
            pass