in eval/lmms_eval_wrapper.py [0:0]
def _prepare_visual_input(self, visual_list: List[Image.Image]) -> Optional[torch.Tensor]:
"""Convert visual inputs to model format."""
if not visual_list or visual_list[0] is None: # Still check if the list is empty or contains None
return None
images = []
for visual in visual_list:
image = None
if isinstance(visual, Image.Image):
image = visual
elif isinstance(visual, str): # Keep path loading for convenience
image = Image.open(visual).convert("RGB")
elif isinstance(visual, np.ndarray): # Keep numpy array loading for convenience
image = Image.fromarray(visual)
else:
# If it's not an Image, a path string, or a numpy array, it's an error
raise ValueError(f"Unsupported visual type: {type(visual)}. Expected PIL Image, path string, or numpy array.")
# Process image
processed = self.image_processor(image)
images.append(processed)
if images:
return torch.stack(images).to(self.device)
return None