in scripts/ft_gemma3n_image_trl.py [0:0]
def process_vision_info(messages: list[dict]) -> list[Image.Image]:
image_inputs = []
for msg in messages:
content = msg.get("content", [])
if not isinstance(content, list):
content = [content]
for element in content:
if isinstance(element, dict) and (
"image" in element or element.get("type") == "image"
):
if "image" in element:
image = element["image"]
else:
image = element
if image is not None:
# Handle dictionary with bytes
if isinstance(image, dict) and "bytes" in image:
pil_image = Image.open(io.BytesIO(image["bytes"]))
image_inputs.append(pil_image.convert("RGB"))
# Handle PIL Image objects
elif hasattr(image, "convert"):
image_inputs.append(image.convert("RGB"))
return image_inputs