in vision/data/datasets_processing_scripts/build_concatenation_datasets_sft/build_ds_sft.py [0:0]
def map_transform_raven(example):
panels = example["panels"]
choices = example["choices"]
target = example["target"]
border_size = 1
width, height = panels[0].size
task = random.randint(1, 2)
if task == 1:
# Easier problem: the balanced binary classification problem.
grid_width, grid_height = border_size + 3 * (width + border_size), border_size + 3 * (height + border_size)
grid_image = Image.new("RGB", (grid_width, grid_height))
draw = ImageDraw.Draw(grid_image)
for i in range(3):
for j in range(3):
x, y = border_size + i * (width + border_size), border_size + j * (height + border_size)
if (i, j) == (2, 2):
grid_image.paste(question_mark_image, (x, y))
else:
grid_image.paste(panels[3 * i + j], (x, y))
total_width = total_height = 4 * border_size + 3 * width
for i in range(4): # Vertical borders
y = i * (width + border_size)
draw.rectangle([0, y, total_width, y + border_size], fill="black")
for i in range(4): # Horizontal borders
x = i * (height + border_size)
draw.rectangle([x, 0, x + border_size, total_height], fill="black")
instruction = random.choice(prompts_raven_1)
if random.random() < 0.5:
correct_choice = choices[target]
choice_draw = ImageDraw.Draw(correct_choice)
choice_draw.rectangle([0, 0, width, border_size], fill="black")
choice_draw.rectangle([0, 0, border_size, height], fill="black")
choice_draw.rectangle([0, height - border_size, width, height], fill="black")
choice_draw.rectangle([width - border_size, 0, width, height], fill="black")
example["images"] = [grid_image, correct_choice]
example["texts"] = [{"user": instruction, "assistant": "Yes.", "source": "RAVEN"}]
else:
random_wrong_choice = choices[random.choice([i for i in range(8) if i != target])]
choice_draw = ImageDraw.Draw(random_wrong_choice)
choice_draw.rectangle([0, 0, width, border_size], fill="black")
choice_draw.rectangle([0, 0, border_size, height], fill="black")
choice_draw.rectangle([0, height - border_size, width, height], fill="black")
choice_draw.rectangle([width - border_size, 0, width, height], fill="black")
example["images"] = [grid_image, random_wrong_choice]
example["texts"] = [{"user": instruction, "assistant": "No.", "source": "RAVEN"}]
elif task == 2:
grid_width, grid_height = int(5.25 * (width + border_size)), 7 * (height + border_size)
grid_image = Image.new("RGB", (grid_width, grid_height), color="white")
draw = ImageDraw.Draw(grid_image)
offset_x = width
offset_y = int(height / 2)
for i in range(3):
for j in range(3):
x, y = offset_x + border_size + i * (width + border_size), offset_y + border_size + j * (
height + border_size
)
if (i, j) == (2, 2):
grid_image.paste(question_mark_image, (x, y))
else:
grid_image.paste(panels[3 * i + j], (x, y))
total_width = total_height = 4 * border_size + 3 * width
for i in range(4): # Vertical borders
y = offset_y + i * (width + border_size)
draw.rectangle([offset_x, y, offset_x + total_width, y + border_size], fill="black")
for i in range(4): # Horizontal borders
x = offset_x + i * (height + border_size)
draw.rectangle([x, offset_y, x + border_size, offset_y + total_height], fill="black")
font = ImageFont.truetype("/admin/home/victor/.local/share/fonts/Hack-Bold.ttf", size=45)
# Loop for choices
for k, choice in enumerate(choices):
x_choice = int(0.25 * width + (k % 4) * 1.25 * width)
y_choice = int(4 * (height + border_size) + math.floor(k / 4) * height * 1.5)
grid_image.paste(choice, (x_choice, y_choice))
draw.rectangle([x_choice, y_choice, x_choice + width, y_choice + border_size], fill="black")
draw.rectangle([x_choice, y_choice, x_choice + border_size, y_choice + height], fill="black")
draw.rectangle(
[x_choice, y_choice + height - border_size, x_choice + width, y_choice + height], fill="black"
)
draw.rectangle(
[x_choice + width - border_size, y_choice, x_choice + width, y_choice + height], fill="black"
)
index_label = chr(ord("A") + k)
draw.text((x_choice + int(width / 2.1), y_choice - int(height / 3)), index_label, fill="black", font=font)
instruction = random.choice(prompts_raven_2)
example["images"] = [grid_image]
example["texts"] = [{"user": instruction, "assistant": chr(ord("A") + example["target"]), "source": "RAVEN"}]
return example