src/datasets/builder.py [1409:1437]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            split_info = split_generator.split_info

        SUFFIX = "-JJJJJ-SSSSS-of-NNNNN"
        fname = f"{self.dataset_name}-{split_generator.name}{SUFFIX}.{file_format}"
        fpath = posixpath.join(self._output_dir, fname)

        if num_proc and num_proc > 1:
            num_input_shards = _number_of_shards_in_gen_kwargs(split_generator.gen_kwargs)
            if num_input_shards <= 1:
                logger.warning(
                    f"Setting num_proc from {num_proc} back to 1 for the {split_info.name} split to disable multiprocessing as it only contains one shard."
                )
                num_proc = 1
            elif num_input_shards < num_proc:
                logger.warning(
                    f"Setting num_proc from {num_proc} to {num_input_shards} for the {split_info.name} split as it only contains {num_input_shards} shards."
                )
                num_proc = num_input_shards

        pbar = hf_tqdm(
            unit=" examples",
            total=split_info.num_examples,
            desc=f"Generating {split_info.name} split",
        )

        _prepare_split_args = {
            "fpath": fpath,
            "file_format": file_format,
            "max_shard_size": max_shard_size,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/datasets/builder.py [1666:1694]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            split_info = split_generator.split_info

        SUFFIX = "-JJJJJ-SSSSS-of-NNNNN"
        fname = f"{self.dataset_name}-{split_generator.name}{SUFFIX}.{file_format}"
        fpath = posixpath.join(self._output_dir, fname)

        if num_proc and num_proc > 1:
            num_input_shards = _number_of_shards_in_gen_kwargs(split_generator.gen_kwargs)
            if num_input_shards <= 1:
                logger.warning(
                    f"Setting num_proc from {num_proc} back to 1 for the {split_info.name} split to disable multiprocessing as it only contains one shard."
                )
                num_proc = 1
            elif num_input_shards < num_proc:
                logger.warning(
                    f"Setting num_proc from {num_proc} to {num_input_shards} for the {split_info.name} split as it only contains {num_input_shards} shards."
                )
                num_proc = num_input_shards

        pbar = hf_tqdm(
            unit=" examples",
            total=split_info.num_examples,
            desc=f"Generating {split_info.name} split",
        )

        _prepare_split_args = {
            "fpath": fpath,
            "file_format": file_format,
            "max_shard_size": max_shard_size,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



