in vilbert/datasets/guesswhat_pointing_dataset.py [0:0]
def _load_annotations(self, clean_datasets):
# Build an index which maps image id with a list of caption annotations.
entries = []
remove_ids = []
if clean_datasets or self.split == "mteval":
remove_ids = np.load(
os.path.join(self.dataroot, "cache", "coco_test_ids.npy")
)
remove_ids = [int(x) for x in remove_ids]
all_images = cPickle.load(
open(os.path.join(self.dataroot, "cache", "image_bbox_list.pkl"), "rb")
)
boxes_dict = cPickle.load(
open(os.path.join(self.dataroot, "cache", "bboxes_dict.pkl"), "rb")
)
if self.split == "mteval":
annotations_path = os.path.join(
self.dataroot, "guesswhat.%s.jsonl" % "train"
)
else:
annotations_path = os.path.join(
self.dataroot, "guesswhat.%s.jsonl" % self.split
)
with jsonlines.open(annotations_path) as reader:
# Build an index which maps image id with a list of qa annotations.
for annotation in reader:
if (
self.split == "train"
and int(annotation["image"]["id"]) in remove_ids
):
continue
elif (
self.split == "mteval"
and int(annotation["image"]["id"]) not in remove_ids
):
continue
questions = []
answers = []
bboxes = []
for q in annotation["qas"]:
questions.append(q["question"])
answers.append(q["answer"])
for o in annotation["objects"]:
bboxes.append(o["id"])
total_bboxes = list(
set(all_images[annotation["image"]["id"]]["bboxes"])
)
total_bboxes = sorted(total_bboxes)
bbox_idx = []
for a in sorted(bboxes):
bbox_idx.append(total_bboxes.index(a))
entries.append(
{
"questions": questions,
"answers": answers,
"dialog_id": annotation["id"],
"image_id": annotation["image"]["id"],
"refBox": boxes_dict[annotation["object_id"]],
"ref_id": annotation["object_id"],
"mc_idx": bbox_idx,
}
)
return entries