video_processing/modules/caption_object_ocr.py (23 lines of code) (raw):

FLORENCE = None def load_florence( hf_hub_or_path="microsoft/Florence-2-large", device="cpu", dtype="float32", check_task_types=True, ): global FLORENCE from florence_tool import FlorenceTool FLORENCE = FlorenceTool( hf_hub_or_path=hf_hub_or_path, device=device, dtype=dtype, check_task_types=check_task_types ) FLORENCE.load_model() def run( image, task_prompt, ): if FLORENCE is None: load_florence() return FLORENCE.run( image=image, task_prompt=task_prompt, )