def process_vision_info()

in scripts/ft_gemma3n_image_trl.py [0:0]


def process_vision_info(messages: list[dict]) -> list[Image.Image]:
    image_inputs = []
    for msg in messages:
        content = msg.get("content", [])
        if not isinstance(content, list):
            content = [content]

        for element in content:
            if isinstance(element, dict) and (
                "image" in element or element.get("type") == "image"
            ):
                if "image" in element:
                    image = element["image"]
                else:
                    image = element
                if image is not None:
                    # Handle dictionary with bytes
                    if isinstance(image, dict) and "bytes" in image:
                        pil_image = Image.open(io.BytesIO(image["bytes"]))
                        image_inputs.append(pil_image.convert("RGB"))
                    # Handle PIL Image objects
                    elif hasattr(image, "convert"):
                        image_inputs.append(image.convert("RGB"))
    return image_inputs