def parse_ref_bbox()

in deepseek_vl2/serve/app_modules/utils.py [0:0]


def parse_ref_bbox(response, image: Image.Image):
    try:
        image = image.copy()
        image_w, image_h = image.size
        draw = ImageDraw.Draw(image)

        ref = re.findall(r'<\|ref\|>.*?<\|/ref\|>', response)
        bbox = re.findall(r'<\|det\|>.*?<\|/det\|>', response)
        assert len(ref) == len(bbox)

        if len(ref) == 0:
            return None

        boxes, labels = [], []
        for box, label in zip(bbox, ref):
            box = box.replace('<|det|>', '').replace('<|/det|>', '')
            label = label.replace('<|ref|>', '').replace('<|/ref|>', '')
            box = box[1:-1]
            for onebox in re.findall(r'\[.*?\]', box):
                boxes.append(eval(onebox))
                labels.append(label)

        for indice, (box, label) in enumerate(zip(boxes, labels)):
            box = (
                int(box[0] / 999 * image_w),
                int(box[1] / 999 * image_h),
                int(box[2] / 999 * image_w),
                int(box[3] / 999 * image_h),
            )

            box_color = BOX2COLOR[indice % len(BOX2COLOR.keys())]
            box_width = 3
            draw.rectangle(box, outline=box_color, width=box_width)

            text_x = box[0]
            text_y = box[1] - 20
            text_color = box_color
            font = ImageFont.truetype("deepseek_vl2/serve/assets/simsun.ttc", size=20)
            draw.text((text_x, text_y), label, font=font, fill=text_color)

        # print(f"boxes = {boxes}, labels = {labels}, re-render = {image}")
        return image
    except:
        return None