in training/data.py [0:0]
def __getitem__(self, index):
# [VQA] question-only
if self.input_type == 'ques':
idx = self.idx[index]
question = self.questions[index]
answer = self.answers[index]
return (idx, question, answer)
# [VQA] question+image
elif self.input_type == 'ques,image':
index = self.available_idx[index]
idx = self.idx[index]
question = self.questions[index]
answer = self.answers[index]
action_length = self.action_lengths[index]
actions = self.actions[index]
actions_in = actions[action_length - self.num_frames:action_length]
actions_out = actions[action_length - self.num_frames + 1:
action_length + 1]
if self.to_cache == True and index in self.img_data_cache:
images = self.img_data_cache[index]
else:
pos_queue = self.pos_queue[index][
-self.num_frames:] # last 5 frames
images = self.get_frames(
self.env_loaded[self.env_list[index]],
pos_queue,
preprocess=True)
if self.to_cache == True:
self.img_data_cache[index] = images.copy()
return (idx, question, answer, images, actions_in, actions_out,
action_length)
# [NAV] question+cnn
elif self.input_type in ['cnn', 'cnn+q']:
index = self.available_idx[index]
idx = self.idx[index]
question = self.questions[index]
answer = self.answers[index]
action_length = self.action_lengths[index]
actions = self.actions[index]
if self.to_cache == True and index in self.img_data_cache:
img_feats = self.img_data_cache[index]
else:
pos_queue = self.pos_queue[index]
images = self.get_frames(
self.env_loaded[self.env_list[index]],
pos_queue,
preprocess=True)
img_feats = self.cnn(
Variable(torch.FloatTensor(images)
.cuda())).data.cpu().numpy().copy()
if self.to_cache == True:
self.img_data_cache[index] = img_feats
# for val or test (evaluation), or
# when target_obj_conn_map_dir is defined (reinforce),
# load entire shortest path navigation trajectory
# and load connectivity map for intermediate rewards
if self.split in ['val', 'test'
] or self.target_obj_conn_map_dir != False:
target_obj_id, target_room = False, False
bbox_obj = [
x for x in self.boxes[index]
if x['type'] == 'object' and x['target'] == True
][0]['box']
for obj_id in self.env_loaded[self.env_list[index]].objects:
box2 = self.env_loaded[self.env_list[index]].objects[
obj_id]['bbox']
if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \
all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True:
target_obj_id = obj_id
break
bbox_room = [
x for x in self.boxes[index]
if x['type'] == 'room' and x['target'] == False
][0]
for room in self.env_loaded[self.env_list[
index]].env.house.all_rooms:
if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \
all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]):
target_room = room
break
assert target_obj_id != False
assert target_room != False
self.env_loaded[self.env_list[index]].set_target_object(
self.env_loaded[self.env_list[index]].objects[
target_obj_id], target_room)
# [NOTE] only works for batch size = 1
self.episode_pos_queue = self.pos_queue[index]
self.episode_house = self.env_loaded[self.env_list[index]]
self.target_room = target_room
self.target_obj = self.env_loaded[self.env_list[
index]].objects[target_obj_id]
actions_in = actions[:action_length]
actions_out = actions[1:action_length + 1] - 2
return (idx, question, answer, img_feats, actions_in,
actions_out, action_length)
# if action_length is n
# images.shape[0] is also n
# actions[0] is <START>
# actions[n] is <END>
# grab 5 random frames
# [NOTE]: this'll break for longer-than-5 navigation sequences
start_idx = np.random.choice(img_feats.shape[0] + 1 -
self.num_frames)
img_feats = img_feats[start_idx:start_idx + self.num_frames]
actions_in = actions[start_idx:start_idx + self.num_frames]
actions_out = actions[start_idx + self.num_frames] - 2
return (idx, question, answer, img_feats, actions_in, actions_out,
action_length)
# [NAV] question+lstm
elif self.input_type in ['lstm', 'lstm+q']:
index = self.available_idx[index]
idx = self.idx[index]
question = self.questions[index]
answer = self.answers[index]
action_length = self.action_lengths[index]
actions = self.actions[index]
if self.split == 'train':
if self.to_cache == True and index in self.img_data_cache:
img_feats = self.img_data_cache[index]
else:
pos_queue = self.pos_queue[index]
images = self.get_frames(
self.env_loaded[self.env_list[index]],
pos_queue,
preprocess=True)
raw_img_feats = self.cnn(
Variable(torch.FloatTensor(images)
.cuda())).data.cpu().numpy().copy()
img_feats = np.zeros(
(self.actions.shape[1], raw_img_feats.shape[1]),
dtype=np.float32)
img_feats[:raw_img_feats.shape[
0], :] = raw_img_feats.copy()
if self.to_cache == True:
self.img_data_cache[index] = img_feats
actions_in = actions.clone() - 1
actions_out = actions[1:].clone() - 2
actions_in[action_length:].fill_(0)
mask = actions_out.clone().gt(-1)
if len(actions_out) > action_length:
actions_out[action_length:].fill_(0)
# for val or test (evaluation), or
# when target_obj_conn_map_dir is defined (reinforce),
# load entire shortest path navigation trajectory
# and load connectivity map for intermediate rewards
if self.split in ['val', 'test'
] or self.target_obj_conn_map_dir != False:
target_obj_id, target_room = False, False
bbox_obj = [
x for x in self.boxes[index]
if x['type'] == 'object' and x['target'] == True
][0]['box']
for obj_id in self.env_loaded[self.env_list[index]].objects:
box2 = self.env_loaded[self.env_list[index]].objects[
obj_id]['bbox']
if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \
all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True:
target_obj_id = obj_id
break
bbox_room = [
x for x in self.boxes[index]
if x['type'] == 'room' and x['target'] == False
][0]
for room in self.env_loaded[self.env_list[
index]].env.house.all_rooms:
if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \
all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]):
target_room = room
break
assert target_obj_id != False
assert target_room != False
self.env_loaded[self.env_list[index]].set_target_object(
self.env_loaded[self.env_list[index]].objects[
target_obj_id], target_room)
# [NOTE] only works for batch size = 1
self.episode_pos_queue = self.pos_queue[index]
self.episode_house = self.env_loaded[self.env_list[index]]
self.target_room = target_room
self.target_obj = self.env_loaded[self.env_list[
index]].objects[target_obj_id]
return (idx, question, answer, False, actions_in, actions_out,
action_length, mask)
return (idx, question, answer, img_feats, actions_in, actions_out,
action_length, mask)
# [NAV] planner-controller
elif self.input_type in ['pacman']:
index = self.available_idx[index]
idx = self.idx[index]
question = self.questions[index]
answer = self.answers[index]
action_length = self.action_lengths[index]
actions = self.actions[index]
planner_actions = self.planner_actions[index]
controller_actions = self.controller_actions[index]
planner_action_length = self.planner_action_lengths[index]
controller_action_length = self.controller_action_lengths[index]
planner_hidden_idx = self.planner_hidden_idx[index]
if self.split == 'train':
if self.to_cache == True and index in self.img_data_cache:
img_feats = self.img_data_cache[index]
else:
pos_queue = self.pos_queue[index]
images = self.get_frames(
self.env_loaded[self.env_list[index]],
pos_queue,
preprocess=True)
raw_img_feats = self.cnn(
Variable(torch.FloatTensor(images)
.cuda())).data.cpu().numpy().copy()
img_feats = np.zeros(
(self.actions.shape[1], raw_img_feats.shape[1]),
dtype=np.float32)
img_feats[:raw_img_feats.shape[
0], :] = raw_img_feats.copy()
if self.to_cache == True:
self.img_data_cache[index] = img_feats
if self.split in ['val', 'test'
] or self.target_obj_conn_map_dir != False:
target_obj_id, target_room = False, False
bbox_obj = [
x for x in self.boxes[index]
if x['type'] == 'object' and x['target'] == True
][0]['box']
for obj_id in self.env_loaded[self.env_list[index]].objects:
box2 = self.env_loaded[self.env_list[index]].objects[
obj_id]['bbox']
if all([bbox_obj['min'][x] == box2['min'][x] for x in range(3)]) == True and \
all([bbox_obj['max'][x] == box2['max'][x] for x in range(3)]) == True:
target_obj_id = obj_id
break
bbox_room = [
x for x in self.boxes[index]
if x['type'] == 'room' and x['target'] == False
][0]
for room in self.env_loaded[self.env_list[
index]].env.house.all_rooms:
if all([room['bbox']['min'][i] == bbox_room['box']['min'][i] for i in range(3)]) and \
all([room['bbox']['max'][i] == bbox_room['box']['max'][i] for i in range(3)]):
target_room = room
break
assert target_obj_id != False
assert target_room != False
self.env_loaded[self.env_list[index]].set_target_object(
self.env_loaded[self.env_list[index]].objects[
target_obj_id], target_room)
# [NOTE] only works for batch size = 1
self.episode_pos_queue = self.pos_queue[index]
self.episode_house = self.env_loaded[self.env_list[index]]
self.target_room = target_room
self.target_obj = self.env_loaded[self.env_list[
index]].objects[target_obj_id]
return (idx, question, answer, actions, action_length)
planner_pos_queue_idx = self.planner_pos_queue_idx[index]
controller_pos_queue_idx = self.controller_pos_queue_idx[index]
planner_img_feats = np.zeros(
(self.actions.shape[1], img_feats.shape[1]), dtype=np.float32)
planner_img_feats[:planner_action_length] = img_feats[
planner_pos_queue_idx]
planner_actions_in = planner_actions.clone() - 1
planner_actions_out = planner_actions[1:].clone() - 2
planner_actions_in[planner_action_length:].fill_(0)
planner_mask = planner_actions_out.clone().gt(-1)
if len(planner_actions_out) > planner_action_length:
planner_actions_out[planner_action_length:].fill_(0)
controller_img_feats = np.zeros(
(self.actions.shape[1], img_feats.shape[1]), dtype=np.float32)
controller_img_feats[:controller_action_length] = img_feats[
controller_pos_queue_idx]
controller_actions_in = actions[1:].clone() - 2
if len(controller_actions_in) > controller_action_length:
controller_actions_in[controller_action_length:].fill_(0)
controller_out = controller_actions
controller_mask = controller_out.clone().gt(-1)
if len(controller_out) > controller_action_length:
controller_out[controller_action_length:].fill_(0)
# zero out forced controller return
for i in range(controller_action_length):
if i >= self.max_controller_actions - 1 and controller_out[i] == 0 and \
(self.max_controller_actions == 1 or
controller_out[i - self.max_controller_actions + 1:i].sum()
== self.max_controller_actions - 1):
controller_mask[i] = 0
return (idx, question, answer, planner_img_feats,
planner_actions_in, planner_actions_out,
planner_action_length, planner_mask, controller_img_feats,
controller_actions_in, planner_hidden_idx, controller_out,
controller_action_length, controller_mask)