in vilbert/datasets/vqa_mc_dataset.py [0:0]
def _load_dataset(dataroot, name):
"""Load entries
dataroot: root path of dataset
name: 'train', 'val', 'trainval', 'minsval'
"""
options_path = "VQA_bert_base_4layer_4conect-pretrained_finetune"
if name == "train" or name == "val":
question_path = os.path.join(
dataroot, "v2_OpenEnded_mscoco_%s2014_questions.json" % name
)
questions = sorted(
json.load(open(question_path))["questions"], key=lambda x: x["question_id"]
)
answer_path = os.path.join(dataroot, "v2_mscoco_%s2014_annotations.json" % name)
answers = sorted(
json.load(open(question_path))["annotations"],
key=lambda x: x["question_id"],
)
option_path = os.path.join("results", options_path, "%s_others.json" % name)
options = sorted(json.load(open(option_path)), key=lambda x: x["question_id"])
elif name == "trainval":
question_path_train = os.path.join(
dataroot, "v2_OpenEnded_mscoco_%s2014_questions.json" % "train"
)
questions_train = sorted(
json.load(open(question_path_train))["questions"],
key=lambda x: x["question_id"],
)
answer_path_train = os.path.join(
dataroot, "v2_mscoco_%s2014_annotations.json" % "train"
)
answers_train = sorted(
json.load(open(answer_path_train))["annotations"],
key=lambda x: x["question_id"],
)
question_path_val = os.path.join(
dataroot, "v2_OpenEnded_mscoco_%s2014_questions.json" % "val"
)
questions_val = sorted(
json.load(open(question_path_val))["questions"],
key=lambda x: x["question_id"],
)
answer_path_val = os.path.join(
dataroot, "v2_mscoco_%s2014_annotations.json" % "val"
)
answers_val = sorted(
json.load(open(answer_path_val))["annotations"],
key=lambda x: x["question_id"],
)
questions = questions_train + questions_val[:-3000]
answers = answers_train + answers_val[:-3000]
option_path_train = os.path.join(
"results", options_path, "%s_others.json" % "train"
)
options_train = sorted(
json.load(open(option_path_train)), key=lambda x: x["question_id"]
)
option_path_val = os.path.join(
"results", options_path, "%s_others.json" % "val"
)
options_val = sorted(
json.load(open(option_path_val)), key=lambda x: x["question_id"]
)
options = options_train + options_val[:-3000]
elif name == "minval":
question_path_val = os.path.join(
dataroot, "v2_OpenEnded_mscoco_%s2014_questions.json" % "val"
)
questions_val = sorted(
json.load(open(question_path_val))["questions"],
key=lambda x: x["question_id"],
)
answer_path_val = os.path.join(
dataroot, "v2_mscoco_%s2014_annotations.json" % "val"
)
answers_val = sorted(
json.load(open(answer_path_val))["annotations"],
key=lambda x: x["question_id"],
)
questions = questions_val[-3000:]
answers = answers_val[-3000:]
option_path = os.path.join("results", options_path, "%s_others.json" % "val")
options = sorted(json.load(open(option_path)), key=lambda x: x["question_id"])
options = options[-3000:]
elif name == "test":
question_path_test = os.path.join(
dataroot, "v2_OpenEnded_mscoco_%s2015_questions.json" % "test"
)
questions_test = sorted(
json.load(open(question_path_test))["questions"],
key=lambda x: x["question_id"],
)
questions = questions_test
option_path = os.path.join("results", options_path, "%s_others.json" % "test")
options = sorted(json.load(open(option_path)), key=lambda x: x["question_id"])
else:
assert False, "data split is not recognized."
if "test" in name:
entries = []
for question, option in zip(questions, options):
assert_eq(question["question_id"], option["question_id"])
entries.append(_create_entry(question, option, None))
else:
assert_eq(len(questions), len(answers))
entries = []
for question, answer, option in zip(questions, answers, options):
assert_eq(question["question_id"], answer["question_id"])
assert_eq(question["image_id"], answer["image_id"])
assert_eq(question["question_id"], option["question_id"])
entries.append(_create_entry(question, option, answer))
return entries