def infer_dataset

def infer_dataset_id()

in src/setfit/model_card.py [0:0]
30 lines of code
13 McCabe index (conditional complexity)

    def infer_dataset_id(self, dataset: Dataset) -> None:
        def subtuple_finder(tuple: Tuple[str], subtuple: Tuple[str]) -> int:
            for i, element in enumerate(tuple):
                if element == subtuple[0] and tuple[i : i + len(subtuple)] == subtuple:
                    return i
            return -1

        def normalize(dataset_id: str) -> str:
            for token in "/\\_-":
                dataset_id = dataset_id.replace(token, "")
            return dataset_id.lower()

        cache_files = dataset.cache_files
        if cache_files and "filename" in cache_files[0]:
            cache_path_parts = Path(cache_files[0]["filename"]).parts
            # Check if the cachefile is under "huggingface/datasets"
            subtuple = ("huggingface", "datasets")
            index = subtuple_finder(cache_path_parts, subtuple)
            if index == -1:
                return

            # Get the folder after "huggingface/datasets"
            cache_dataset_name = cache_path_parts[index + len(subtuple)]
            # If the dataset has an author:
            if "___" in cache_dataset_name:
                author, dataset_name = cache_dataset_name.split("___")
            else:
                author = None
                dataset_name = cache_dataset_name

            # Make sure the normalized dataset IDs match
            dataset_list = [
                dataset
                for dataset in list_datasets(author=author, dataset_name=dataset_name)
                if normalize(dataset.id) == normalize(cache_dataset_name)
            ]
            # If there's only one match, get the ID from it
            if len(dataset_list) == 1:
                self.dataset_id = dataset_list[0].id