in src/nova_act/impl/run_info_compiler.py [0:0]
def _add_bbox_to_image(image: str, response: str) -> str:
if not image:
return image
# Find the first bbox in the response. Right now there can ever only be on bbox. The agent will only take one
# action at a time and then observe before taking the next one.
bbox_match = _BBOX_MATCHER.search(response)
if not bbox_match:
return image
top, left, bottom, right = map(int, bbox_match.groups())
# Strip the data prefix in the base64 image.
image_match = _IMAGE_PREFIX_MATCHER.match(image)
if image_match:
image = image_match.group(1)
# Add the rectangle to the image.
pil_image = Image.open(io.BytesIO(base64.b64decode(image)))
draw = ImageDraw.Draw(pil_image)
draw.rectangle((left, top, right, bottom), outline="red", width=3)
image_bytes_io = io.BytesIO()
pil_image.save(image_bytes_io, format="JPEG")
# Return the modified image with the data prefix.
return "data:image/jpeg;base64," + base64.b64encode(image_bytes_io.getvalue()).decode("utf-8")