def _infer_custom_type_and_encode()

in src/datasets/arrow_writer.py [0:0]


    def _infer_custom_type_and_encode(data: Iterable) -> tuple[Iterable, Optional[FeatureType]]:
        """Implement type inference for custom objects like PIL.Image.Image -> Image type.

        This function is only used for custom python objects that can't be directly passed to build
        an Arrow array. In such cases is infers the feature type to use, and it encodes the data so
        that they can be passed to an Arrow array.

        Args:
            data (Iterable): array of data to infer the type, e.g. a list of PIL images.

        Returns:
            Tuple[Iterable, Optional[FeatureType]]: a tuple with:
                - the (possibly encoded) array, if the inferred feature type requires encoding
                - the inferred feature type if the array is made of supported custom objects like
                    PIL images, else None.
        """
        if config.PIL_AVAILABLE and "PIL" in sys.modules:
            import PIL.Image

            non_null_idx, non_null_value = first_non_null_non_empty_value(data)
            if isinstance(non_null_value, PIL.Image.Image):
                return [Image().encode_example(value) if value is not None else None for value in data], Image()
            if isinstance(non_null_value, list) and isinstance(non_null_value[0], PIL.Image.Image):
                return [[Image().encode_example(x) for x in value] if value is not None else None for value in data], [
                    Image()
                ]
        if config.PDFPLUMBER_AVAILABLE and "pdfplumber" in sys.modules:
            import pdfplumber

            non_null_idx, non_null_value = first_non_null_non_empty_value(data)
            if isinstance(non_null_value, pdfplumber.pdf.PDF):
                return [Pdf().encode_example(value) if value is not None else None for value in data], Pdf()
            if isinstance(non_null_value, list) and isinstance(non_null_value[0], pdfplumber.pdf.PDF):
                return [[Pdf().encode_example(x) for x in value] if value is not None else None for value in data], [
                    Pdf()
                ]
        return data, None