in deepseek_vl2/serve/app_modules/utils.py [0:0]
def parse_ref_bbox(response, image: Image.Image):
try:
image = image.copy()
image_w, image_h = image.size
draw = ImageDraw.Draw(image)
ref = re.findall(r'<\|ref\|>.*?<\|/ref\|>', response)
bbox = re.findall(r'<\|det\|>.*?<\|/det\|>', response)
assert len(ref) == len(bbox)
if len(ref) == 0:
return None
boxes, labels = [], []
for box, label in zip(bbox, ref):
box = box.replace('<|det|>', '').replace('<|/det|>', '')
label = label.replace('<|ref|>', '').replace('<|/ref|>', '')
box = box[1:-1]
for onebox in re.findall(r'\[.*?\]', box):
boxes.append(eval(onebox))
labels.append(label)
for indice, (box, label) in enumerate(zip(boxes, labels)):
box = (
int(box[0] / 999 * image_w),
int(box[1] / 999 * image_h),
int(box[2] / 999 * image_w),
int(box[3] / 999 * image_h),
)
box_color = BOX2COLOR[indice % len(BOX2COLOR.keys())]
box_width = 3
draw.rectangle(box, outline=box_color, width=box_width)
text_x = box[0]
text_y = box[1] - 20
text_color = box_color
font = ImageFont.truetype("deepseek_vl2/serve/assets/simsun.ttc", size=20)
draw.text((text_x, text_y), label, font=font, fill=text_color)
# print(f"boxes = {boxes}, labels = {labels}, re-render = {image}")
return image
except:
return None