in grolp/eval/eval_subgoals.py [0:0]
def evaluate(cls,
env: ThorEnv, predictor: AlfredPredictor, gold_forward_out: Dict[str, Any],
eval_idx: int, r_idx: int, image_loader: CustomImageLoader,
region_detector: MaskRCNNDetector, traj_data, args, lock, successes, failures, results):
action_mems = None
state_mems = None
language_features = None
language_masks = None
prev_actions = None
prev_objects = None
# setup scene
reward_type = 'dense'
cls.setup_scene(env, traj_data, r_idx, args, reward_type=reward_type)
# expert demonstration to reach eval_idx-1
expert_init_actions = [a['discrete_action'] for a in traj_data['plan']['low_actions'] if
a['high_idx'] < eval_idx]
# subgoal info
subgoal_action = traj_data['plan']['high_pddl'][eval_idx]['discrete_action']['action']
subgoal_instr = traj_data['turk_annotations']['anns'][r_idx]['high_descs'][eval_idx]
# print subgoal info
print(
"Evaluating: %s\nSubgoal %s (%d)\nInstr: %s" % (traj_data['root'], subgoal_action, eval_idx, subgoal_instr))
nav_actions = ['MoveAhead_25', 'RotateLeft_90', 'RotateRight_90', 'LookDown_15', 'LookUp_15']
# extract language features
# feat = model.featurize([(traj_data, False)], load_mask=False)
# TODO: load gold trajectory data for expert unrolling
done, subgoal_success = False, False
fails = 0
t = 0
reward = 0
is_action_ok = True
pred_action = None
pred_mask = None
output_dict = None
action_mask = None
object_mask = None
goal_instr = traj_data['turk_annotations']['anns'][r_idx]['task_desc']
high_descs = traj_data['turk_annotations']['anns'][r_idx]["high_descs"]
while not done:
# break if max_steps reached
if t >= args.max_steps + len(expert_init_actions):
break
# expert teacher-forcing upto subgoal
if t < len(expert_init_actions):
# get expert action
action = expert_init_actions[t]
subgoal_completed = traj_data['plan']['low_actions'][t + 1]['high_idx'] != \
traj_data['plan']['low_actions'][t]['high_idx']
compressed_mask = action['args']['mask'] if 'mask' in action['args'] else None
mask = env.decompress_mask(compressed_mask) if compressed_mask is not None else None
# execute expert action
success, _, _, err, _ = env.va_interact(action['action'], interact_mask=mask,
smooth_nav=args.smooth_nav, debug=args.debug)
if not success:
print("expert initialization failed")
break
# update transition reward
_, _ = env.get_transition_reward()
prev_actions = torch.tensor(
[[predictor._model.vocab.get_token_index(action['action'], "low_action_labels")]],
dtype=torch.long)
# subgoal evaluation
else:
# TODO: run model with gold sub-trajectory and get memories
# we do this only the first time though
if t == len(expert_init_actions):
# at this time step we have completely unrolled all the expert demonstrations
# so we run the model with the entire gold trajectory up to the current subgoal
instruction_idx = eval_idx if predictor.is_split_model else None
if t != 0:
# in this case we want to initialise the previous memories and make sure that we shift them
# accordingly
action_mems = []
state_mems = []
for am in gold_forward_out["action_mems"]:
remove_steps = gold_forward_out["trajectory_len"] - len(expert_init_actions)
curr_am = am[:-remove_steps].clone()
shape = (remove_steps,) + curr_am.shape[1:]
curr_am = torch.cat([curr_am.new_zeros(*shape), curr_am])
action_mems.append(curr_am)
for sm in gold_forward_out["state_mems"]:
remove_steps = gold_forward_out["trajectory_len"] - len(expert_init_actions)
curr_sm = sm[:-remove_steps].clone()
shape = (remove_steps,) + curr_sm.shape[1:]
curr_sm = torch.cat([curr_sm.new_zeros(*shape), curr_sm])
state_mems.append(curr_sm)
# after this we run the model normally
# check whether previous action was successful
if is_action_ok:
# in this situation we just want to execute the current action
object_features = cls.get_visual_features(
env,
image_loader,
region_detector,
args,
predictor.cuda_device
)
num_objects_in_front = object_features[0]["num_objects"]
# reinitialise the masks for action replay
# when a navigation action fails we restrict the
action_mask, object_mask = predictor.init_masks(num_objects_in_front)
# if args.debug and instruction_idx < len(high_descs):
# print(f"Instruction ({instruction_idx}/{len(high_descs)}): {high_descs[instruction_idx]}")
instance = predictor.featurize(traj_data, object_features, instruction_idx)
# forward model
output_dict = predictor.predict_instance(
instance,
action_mems=action_mems,
state_mems=state_mems,
language_features=language_features,
language_masks=language_masks,
prev_actions=prev_actions,
prev_objects=prev_objects,
num_objects_in_front=num_objects_in_front
)[0]
pred_action = output_dict["actions"]
pred_mask = output_dict["masks"]
action_mems = output_dict["action_mems"]
state_mems = output_dict["state_mems"]
prev_objects = output_dict.get("prev_objects")
prev_actions = output_dict.get("prev_actions")
if prev_objects is not None:
prev_objects = prev_objects.unsqueeze(0)
goto_next_instruction = output_dict["goto_next_instruction"]
if goto_next_instruction and predictor.is_split_model:
# we got to the next instruction now if we predict 'go next'
if instruction_idx < len(high_descs):
# we increase the index only if we can to avoid out-of-boundaries errors
instruction_idx += 1
# we also reset the language features so that the model will recompute them
language_masks = None
language_features = None
else:
# otherwise we reuse the cached language features
language_features = output_dict["language_features"].unsqueeze(0)
language_masks = output_dict["language_masks"].unsqueeze(0)
else:
# in this situation we need to fix something that happened in the previous step
# we don't need to recompute the visual features here because they are going to be the same
if pred_action in nav_actions:
# current pred action failed. We inspect the probability distribution and we get the next available one
pred_action_idx = predictor._model.vocab.get_token_index(pred_action, "low_action_labels")
action_mask[pred_action_idx] = 0
# we compute the softmax and mask out the already used actions
pred_action_idx = np.argmax(output_dict["action_probs"] * action_mask, -1)
pred_action = predictor._model.vocab.get_token_from_index(pred_action_idx, "low_action_labels")
prev_actions = output_dict["prev_actions"].unsqueeze(0)
prev_actions.fill_(pred_action_idx)
else:
pred_object = output_dict["pred_objects"]
object_mask[pred_object] = 0
object_logits = output_dict["object_probs"]
pred_object = np.argmax(object_logits * object_mask, -1)
# then we extract the masks for the front view
object_masks = output_dict["interactive_object_masks"][0]
pred_mask = object_masks[pred_object].squeeze(0)
output_dict["pred_objects"] = pred_object
prev_objects = output_dict.get("prev_objects")
if prev_objects is not None:
prev_objects.fill_(pred_object)
prev_actions = output_dict["prev_actions"].unsqueeze(0)
# debug
if args.debug:
print("Pred: ", pred_action)
# update prev action
prev_action = str(pred_action)
if pred_action not in cls.TERMINAL_TOKENS:
# use predicted action and mask (if provided) to interact with the env
t_success, _, _, err, _ = env.va_interact(pred_action, interact_mask=pred_mask,
smooth_nav=args.smooth_nav,
debug=args.debug)
if not t_success:
fails += 1
if fails >= args.max_fails:
print("Interact API failed %d times" % (fails) + "; latest error '%s'" % err)
break
# next time-step
t_reward, t_done = env.get_transition_reward()
reward += t_reward
# update subgoals
curr_subgoal_idx = env.get_subgoal_idx()
if curr_subgoal_idx == eval_idx:
subgoal_success = True
break
# terminal tokens predicted
if pred_action in cls.TERMINAL_TOKENS:
print("predicted %s" % pred_action)
break
# increment time index
t += 1
# metrics
pl = float(t - len(expert_init_actions)) + 1 # +1 for last action
expert_pl = len([ll for ll in traj_data['plan']['low_actions'] if ll['high_idx'] == eval_idx])
s_spl = (1 if subgoal_success else 0) * min(1., expert_pl / (pl + sys.float_info.epsilon))
plw_s_spl = s_spl * expert_pl
# log success/fails
lock.acquire()
# results
for sg in cls.ALL_SUBGOALS:
results[sg] = {
'sr': 0.,
'successes': 0.,
'evals': 0.,
'sr_plw': 0.
}
log_entry = {'trial': traj_data['task_id'],
'type': traj_data['task_type'],
'repeat_idx': int(r_idx),
'subgoal_idx': int(eval_idx),
'subgoal_type': subgoal_action,
'subgoal_instr': subgoal_instr,
'subgoal_success_spl': float(s_spl),
'subgoal_path_len_weighted_success_spl': float(plw_s_spl),
'subgoal_path_len_weight': float(expert_pl),
'reward': float(reward)}
if subgoal_success:
sg_successes = successes[subgoal_action]
sg_successes.append(log_entry)
successes[subgoal_action] = sg_successes
else:
sg_failures = failures[subgoal_action]
sg_failures.append(log_entry)
failures[subgoal_action] = sg_failures
# save results
print("-------------")
subgoals_to_evaluate = list(successes.keys())
subgoals_to_evaluate.sort()
for sg in subgoals_to_evaluate:
num_successes, num_failures = len(successes[sg]), len(failures[sg])
num_evals = len(successes[sg]) + len(failures[sg])
if num_evals > 0:
sr = float(num_successes) / num_evals
total_path_len_weight = sum([entry['subgoal_path_len_weight'] for entry in successes[sg]]) + \
sum([entry['subgoal_path_len_weight'] for entry in failures[sg]])
sr_plw = float(sum([entry['subgoal_path_len_weighted_success_spl'] for entry in successes[sg]]) +
sum([entry['subgoal_path_len_weighted_success_spl'] for entry in
failures[sg]])) / total_path_len_weight
results[sg] = {
'sr': sr,
'successes': num_successes,
'evals': num_evals,
'sr_plw': sr_plw
}
print("%s ==========" % sg)
print("SR: %d/%d = %.3f" % (num_successes, num_evals, sr))
print("PLW SR: %.3f" % (sr_plw))
print("------------")
lock.release()