src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py [314:333]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                    while True:
                        try:
                            pa_table = paj.read_json(
                                io.BytesIO(batch), read_options=paj.ReadOptions(block_size=block_size)
                            )
                            break
                        except (pa.ArrowInvalid, pa.ArrowNotImplementedError) as e:
                            if (
                                isinstance(e, pa.ArrowInvalid)
                                and "straddling" not in str(e)
                                or block_size > len(batch)
                            ):
                                raise
                            else:
                                # Increase the block size in case it was too small.
                                # The block size will be reset for the next file.
                                logger.debug(
                                    f"Batch of {len(batch)} bytes couldn't be parsed with block_size={block_size}. Retrying with block_size={block_size * 2}."
                                )
                                block_size *= 2
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/datasets/packaged_modules/json/json.py [135:154]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                            while True:
                                try:
                                    pa_table = paj.read_json(
                                        io.BytesIO(batch), read_options=paj.ReadOptions(block_size=block_size)
                                    )
                                    break
                                except (pa.ArrowInvalid, pa.ArrowNotImplementedError) as e:
                                    if (
                                        isinstance(e, pa.ArrowInvalid)
                                        and "straddling" not in str(e)
                                        or block_size > len(batch)
                                    ):
                                        raise
                                    else:
                                        # Increase the block size in case it was too small.
                                        # The block size will be reset for the next file.
                                        logger.debug(
                                            f"Batch of {len(batch)} bytes couldn't be parsed with block_size={block_size}. Retrying with block_size={block_size * 2}."
                                        )
                                        block_size *= 2
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



