def fetch_dataset()

in src/hyperpod_nemo_adapter/collections/data/datasets/hf_dataset.py [0:0]

19 lines of code
1 McCabe index (conditional complexity)


    def fetch_dataset(self, path):
        match self.data_format:
            case DataTypes.ARROW:
                dataset = load_from_disk(path)
            case DataTypes.JSONGZ:
                dataset = load_dataset(
                    self.input_path,
                    data_files=[os.path.join(path, f"*{DataTypes.JSONGZ}")],
                    split=self.partition,
                )
            case DataTypes.JSON:
                dataset = load_dataset(
                    self.input_path,
                    data_files=[os.path.join(path, f"*{DataTypes.JSON}")],
                    split=self.partition,
                )
            case _:
                raise NotImplementedError(f"{self.data_format} is not supported.")
        return dataset