in florence2-VQA/src_serve/score.py [0:0]
def run_example_base64(task_prompt, text_input, base64_image, params):
max_new_tokens = params["max_new_tokens"]
num_beams = params["num_beams"]
image = Image.open(BytesIO(base64.b64decode(base64_image)))
prompt = task_prompt + text_input
# Ensure the image is in RGB mode
if image.mode != "RGB":
image = image.convert("RGB")
inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=max_new_tokens,
num_beams=num_beams
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
return parsed_answer