src/datasets/builder.py [1440:1506]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        }

        if num_proc is None or num_proc == 1:
            result = None
            gen_kwargs = split_generator.gen_kwargs
            job_id = 0
            with pbar:
                for job_id, done, content in self._prepare_split_single(
                    gen_kwargs=gen_kwargs, job_id=job_id, **_prepare_split_args
                ):
                    if done:
                        result = content
                    else:
                        pbar.update(content)
            # wrapping everything into lists for consistency with the multiprocessed code path
            assert result is not None, "Failed to retrieve results from prepare_split"
            examples_per_job, bytes_per_job, features_per_job, shards_per_job, shard_lengths_per_job = (
                [item] for item in result
            )
        else:
            kwargs_per_job = [
                {"gen_kwargs": gen_kwargs, "job_id": job_id, **_prepare_split_args}
                for job_id, gen_kwargs in enumerate(
                    _split_gen_kwargs(split_generator.gen_kwargs, max_num_jobs=num_proc)
                )
            ]
            num_jobs = len(kwargs_per_job)

            examples_per_job = [None] * num_jobs
            bytes_per_job = [None] * num_jobs
            features_per_job = [None] * num_jobs
            shards_per_job = [None] * num_jobs
            shard_lengths_per_job = [None] * num_jobs

            with Pool(num_proc) as pool:
                with pbar:
                    for job_id, done, content in iflatmap_unordered(
                        pool, self._prepare_split_single, kwargs_iterable=kwargs_per_job
                    ):
                        if done:
                            # the content is the result of the job
                            (
                                examples_per_job[job_id],
                                bytes_per_job[job_id],
                                features_per_job[job_id],
                                shards_per_job[job_id],
                                shard_lengths_per_job[job_id],
                            ) = content
                        else:
                            # the content is the number of examples progress update
                            pbar.update(content)

            assert None not in examples_per_job, (
                f"Failed to retrieve results from prepare_split: result list {examples_per_job} still contains None - at least one worker failed to return its results"
            )

        total_shards = sum(shards_per_job)
        total_num_examples = sum(examples_per_job)
        total_num_bytes = sum(bytes_per_job)
        features = features_per_job[0]

        split_generator.split_info.num_examples = total_num_examples
        split_generator.split_info.num_bytes = total_num_bytes

        # should rename everything at the end
        logger.debug(f"Renaming {total_shards} shards.")
        if total_shards > 1:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/datasets/builder.py [1695:1761]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        }

        if num_proc is None or num_proc == 1:
            result = None
            gen_kwargs = split_generator.gen_kwargs
            job_id = 0
            with pbar:
                for job_id, done, content in self._prepare_split_single(
                    gen_kwargs=gen_kwargs, job_id=job_id, **_prepare_split_args
                ):
                    if done:
                        result = content
                    else:
                        pbar.update(content)
            # wrapping everything into lists for consistency with the multiprocessed code path
            assert result is not None, "Failed to retrieve results from prepare_split"
            examples_per_job, bytes_per_job, features_per_job, shards_per_job, shard_lengths_per_job = (
                [item] for item in result
            )
        else:
            kwargs_per_job = [
                {"gen_kwargs": gen_kwargs, "job_id": job_id, **_prepare_split_args}
                for job_id, gen_kwargs in enumerate(
                    _split_gen_kwargs(split_generator.gen_kwargs, max_num_jobs=num_proc)
                )
            ]
            num_jobs = len(kwargs_per_job)

            examples_per_job = [None] * num_jobs
            bytes_per_job = [None] * num_jobs
            features_per_job = [None] * num_jobs
            shards_per_job = [None] * num_jobs
            shard_lengths_per_job = [None] * num_jobs

            with Pool(num_proc) as pool:
                with pbar:
                    for job_id, done, content in iflatmap_unordered(
                        pool, self._prepare_split_single, kwargs_iterable=kwargs_per_job
                    ):
                        if done:
                            # the content is the result of the job
                            (
                                examples_per_job[job_id],
                                bytes_per_job[job_id],
                                features_per_job[job_id],
                                shards_per_job[job_id],
                                shard_lengths_per_job[job_id],
                            ) = content
                        else:
                            # the content is the number of examples progress update
                            pbar.update(content)

            assert None not in examples_per_job, (
                f"Failed to retrieve results from prepare_split: result list {examples_per_job} still contains None - at least one worker failed to return its results"
            )

        total_shards = sum(shards_per_job)
        total_num_examples = sum(examples_per_job)
        total_num_bytes = sum(bytes_per_job)
        features = features_per_job[0]

        split_generator.split_info.num_examples = total_num_examples
        split_generator.split_info.num_bytes = total_num_bytes

        # should rename everything at the end
        logger.debug(f"Renaming {total_shards} shards.")
        if total_shards > 1:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



