def _prepare_visual_input()

in eval/lmms_eval_wrapper.py [0:0]


    def _prepare_visual_input(self, visual_list: List[Image.Image]) -> Optional[torch.Tensor]:
        """Convert visual inputs to model format."""
        if not visual_list or visual_list[0] is None: # Still check if the list is empty or contains None
            return None
            
        images = []
        for visual in visual_list:
            image = None
            if isinstance(visual, Image.Image):
                image = visual
            elif isinstance(visual, str): # Keep path loading for convenience
                image = Image.open(visual).convert("RGB")
            elif isinstance(visual, np.ndarray): # Keep numpy array loading for convenience
                image = Image.fromarray(visual)
            else:
                # If it's not an Image, a path string, or a numpy array, it's an error
                raise ValueError(f"Unsupported visual type: {type(visual)}. Expected PIL Image, path string, or numpy array.")
            
            # Process image
            processed = self.image_processor(image)
            images.append(processed)
        
        if images:
            return torch.stack(images).to(self.device)
        return None