def _get_data_format()

in src/hyperpod_nemo_adapter/collections/data/datasets/hf_dataset.py [0:0]


    def _get_data_format(self, path):
        if isinstance(path, str):
            path = [path]
        files = []
        for p in path:
            files += [f for f in Path(p).iterdir() if f.is_file()]
        suffixes_list = list(set(["".join(Path(f).suffixes) for f in files]))
        if any(suffix == DataTypes.ARROW for suffix in suffixes_list):
            return DataTypes.ARROW

        elif any(suffix == DataTypes.JSONGZ for suffix in suffixes_list):
            return DataTypes.JSONGZ

        elif any(suffix == DataTypes.JSON for suffix in suffixes_list):
            return DataTypes.JSON

        else:
            raise NotImplementedError(
                f"Unsupported file format in dataset directory. Expecting files of type '.arrow' '.json.gz' or '.json' but instead found {','.join(suffixes_list)}."
            )