core/video.py (54 lines of code) (raw):

# Copyright 2025 DeepMind Technologies Limited. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Video processors.""" import asyncio import enum from typing import AsyncIterable import cv2 from genai_processors import content_api from genai_processors import processor import PIL.Image ProcessorPart = content_api.ProcessorPart class VideoMode(enum.Enum): """Video mode for the VideoIn processor.""" CAMERA = "camera" SCREEN = "screen" def _get_single_camera_frame( cap: cv2.VideoCapture, substream_name: str ) -> ProcessorPart: """Get a single frame from the camera.""" # Read the frame queue ret, frame = cap.read() if not ret: raise RuntimeError("Couldn't captrue a frame.") # Fix: Convert BGR to RGB color space # OpenCV captures in BGR but PIL expects RGB format # This prevents the blue tint in the video feed frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = PIL.Image.fromarray(frame_rgb) # Now using RGB frame img.format = "JPEG" return ProcessorPart(img, substream_name=substream_name, role="USER") def _get_single_screen_frame(substream_name: str) -> ProcessorPart: """Get a single frame from the screen.""" try: from mss import mss # pytype: disable=import-error # pylint: disable=g-import-not-at-top except ImportError: raise ImportError("Please install mss package using 'pip install mss'") sct = mss.mss() monitor = sct.monitors[0] i = sct.grab(monitor) img = PIL.Image.frombuffer("RGB", i.size, i.rgb) img.format = "JPEG" return ProcessorPart(img, substream_name=substream_name, role="USER") @processor.source async def VideoIn( # pylint: disable=invalid-name substream_name: str = "realtime", video_mode: VideoMode = VideoMode.CAMERA ) -> AsyncIterable[ProcessorPart]: """Yields image parts from a camera or a computer screen. Args: substream_name: The name of the substream to use for the generated images. video_mode: The video mode to use for the video. Can be CAMERA or SCREEN. """ if video_mode == VideoMode.CAMERA: # This takes about a second, and will block the whole program # causing the audio pipeline to overflow if you don't to_thread it. cap = await asyncio.to_thread( cv2.VideoCapture, 0 ) # 0 represents the default camera try: # The coroutine will be cancelled when we are done, breaking the loop. while True: yield await asyncio.to_thread( _get_single_camera_frame, cap, substream_name ) await asyncio.sleep(1.0) finally: # Release the VideoCapture object cap.release() elif video_mode == VideoMode.SCREEN: while True: yield await asyncio.to_thread(_get_single_screen_frame, substream_name) await asyncio.sleep(1.0) else: raise ValueError(f"Unsupported video mode: {video_mode}")