in grolp/eval/eval_task.py [0:0]
def evaluate(cls, env: ThorEnv, predictor: AlfredPredictor, r_idx: int, image_loader: CustomImageLoader,
region_detector: MaskRCNNDetector, traj_data, args, lock, successes, failures, results):
action_mems = None
state_mems = None
language_features = None
language_masks = None
prev_actions = None
prev_objects = None
instruction_idx = 0 if predictor.is_split_model else None
if args.save_video_path is not None:
high_res_images_dir = os.path.join(
os.path.dirname(args.model_path),
args.save_video_path,
traj_data["split"],
traj_data["task_type"],
traj_data["task_id"],
str(traj_data["repeat_idx"])
)
if not os.path.exists(high_res_images_dir):
os.makedirs(high_res_images_dir)
else:
high_res_images_dir = None
# setup scene
reward_type = 'dense'
cls.setup_scene(env, traj_data, r_idx, args, reward_type=reward_type)
# goal instr
goal_instr = traj_data['turk_annotations']['anns'][r_idx]['task_desc']
high_descs = traj_data['turk_annotations']['anns'][r_idx]["high_descs"]
done, success = False, False
fails = 0
t = 0
reward = 0
action_stats = {}
prev_action = "Start"
pred_trajectory = []
prev_image = None
nav_actions = ['MoveAhead_25', 'RotateLeft_90', 'RotateRight_90', 'LookDown_15', 'LookUp_15']
if high_res_images_dir is not None:
image_to_save = env.last_event.frame[:, :, ::-1]
save_image(image_to_save, high_res_images_dir)
# used to determine whether we successfully execute an action
is_action_ok = True
pred_action = None
pred_mask = None
output_dict = None
action_mask = None
object_mask = None
while not done:
# break if max_steps reached
if t >= args.max_steps:
break
# check whether previous action was successful
if is_action_ok:
# in this situation we just want to execute the current action
object_features = cls.get_visual_features(
env,
image_loader,
region_detector,
args,
predictor.cuda_device
)
num_objects_in_front = object_features[0]["num_objects"]
# reinitialise the masks for action replay
# when a navigation action fails we restrict the
action_mask, object_mask = predictor.init_masks(num_objects_in_front)
if args.debug and instruction_idx < len(high_descs):
print(f"Instruction ({instruction_idx}/{len(high_descs)}): {high_descs[instruction_idx]}")
instance = predictor.featurize(traj_data, object_features, instruction_idx)
# forward model
output_dict = predictor.predict_instance(
instance,
action_mems=action_mems,
state_mems=state_mems,
language_features=language_features,
language_masks=language_masks,
prev_actions=prev_actions,
prev_objects=prev_objects,
num_objects_in_front=num_objects_in_front
)[0]
pred_action = output_dict["actions"]
pred_mask = output_dict["masks"]
action_mems = output_dict["action_mems"]
state_mems = output_dict["state_mems"]
prev_objects = output_dict.get("prev_objects")
prev_actions = output_dict["prev_actions"].unsqueeze(0)
if prev_objects is not None:
prev_objects = prev_objects.unsqueeze(0)
goto_next_instruction = output_dict["goto_next_instruction"]
if goto_next_instruction and predictor.is_split_model:
# we got to the next instruction now if we predict 'go next'
if instruction_idx < len(high_descs):
# we increase the index only if we can to avoid out-of-boundaries errors
instruction_idx += 1
# we also reset the language features so that the model will recompute them
language_masks = None
language_features = None
else:
# otherwise we reuse the cached language features
language_features = output_dict["language_features"].unsqueeze(0)
language_masks = output_dict["language_masks"].unsqueeze(0)
else:
# in this situation we need to fix something that happened in the previous step
# we don't need to recompute the visual features here because they are going to be the same
if pred_action in nav_actions:
# current pred action failed. We inspect the probability distribution and we get the next available one
pred_action_idx = predictor._model.vocab.get_token_index(pred_action, "low_action_labels")
action_mask[pred_action_idx] = 0
# we compute the softmax and mask out the already used actions
pred_action_idx = np.argmax(output_dict["action_probs"] * action_mask, -1)
pred_action = predictor._model.vocab.get_token_from_index(pred_action_idx, "low_action_labels")
prev_actions = output_dict["prev_actions"].unsqueeze(0)
prev_actions.fill_(pred_action_idx)
else:
pred_object = output_dict["pred_objects"]
object_mask[pred_object] = 0
object_logits = output_dict["object_probs"]
pred_object = np.argmax(object_logits * object_mask, -1)
# then we extract the masks for the front view
object_masks = output_dict["interactive_object_masks"][0]
pred_mask = object_masks[pred_object].squeeze(0)
output_dict["pred_objects"] = pred_object
prev_objects = output_dict.get("prev_objects")
if prev_objects is not None:
prev_objects.fill_(pred_object)
pred_trajectory.append(pred_action)
# check if <<stop>> was predicted
if pred_action == cls.STOP_TOKEN:
print("\tpredicted STOP")
break
# print action
if args.debug:
print(pred_action)
# use predicted action and mask (if available) to interact with the env
is_action_ok, _, _, err, _ = env.va_interact(pred_action, interact_mask=pred_mask,
smooth_nav=args.smooth_nav,
debug=args.debug)
if pred_action not in action_stats:
action_stats[pred_action] = dict(
total=0,
success=0
)
action_stats[pred_action]['total'] += 1
if not is_action_ok:
fails += 1
if fails >= args.max_fails:
print("Interact API failed %d times" % fails + "; latest error '%s'" % err)
break
else:
action_stats[pred_action]['success'] += 1
# next time-step
t_reward, t_done = env.get_transition_reward()
reward += t_reward
t += 1
if high_res_images_dir is not None:
image_to_save = env.last_event.frame[:, :, ::-1]
save_image(image_to_save, high_res_images_dir)
# check if goal was satisfied
goal_satisfied = env.get_goal_satisfied()
if goal_satisfied:
print("Goal Reached")
success = True
# goal_conditions
pcs = env.get_goal_conditions_met()
goal_condition_success_rate = pcs[0] / float(pcs[1])
# SPL
path_len_weight = len(traj_data['plan']['low_actions'])
s_spl = (1 if goal_satisfied else 0) * min(1., path_len_weight / float(t))
pc_spl = goal_condition_success_rate * min(1., path_len_weight / float(t))
# path length weighted SPL
plw_s_spl = s_spl * path_len_weight
plw_pc_spl = pc_spl * path_len_weight
# log success/fails
lock.acquire()
log_entry = {'trial': traj_data['task_id'],
'type': traj_data['task_type'],
'repeat_idx': int(r_idx),
'goal_instr': goal_instr,
'completed_goal_conditions': int(pcs[0]),
'total_goal_conditions': int(pcs[1]),
'goal_condition_success': float(goal_condition_success_rate),
'success_spl': float(s_spl),
'path_len_weighted_success_spl': float(plw_s_spl),
'goal_condition_spl': float(pc_spl),
'path_len_weighted_goal_condition_spl': float(plw_pc_spl),
'path_len_weight': int(path_len_weight),
'reward': float(reward),
'action_stats': action_stats,
'api_failure_limit': fails >= args.max_fails,
'pred_traj_length': len(pred_trajectory),
'pred_trajectory': ','.join(pred_trajectory)
}
if 'plan' in traj_data:
log_entry['gold_traj_length'] = len(traj_data["plan"]["low_actions"])
log_entry['gold_trajectory'] = ','.join(
a["discrete_action"]["action"] for a in traj_data["plan"]["low_actions"])
if success:
successes.append(log_entry)
else:
failures.append(log_entry)
# overall results
results['all'] = cls.get_metrics(successes, failures)
print("-------------")
print("SR: %d/%d = %.3f" % (results['all']['success']['num_successes'],
results['all']['success']['num_evals'],
results['all']['success']['success_rate']))
print("GC: %d/%d = %.3f" % (results['all']['goal_condition_success']['completed_goal_conditions'],
results['all']['goal_condition_success']['total_goal_conditions'],
results['all']['goal_condition_success']['goal_condition_success_rate']))
print("PLW SR: %.3f" % (results['all']['path_length_weighted_success_rate']))
print("PLW GC: %.3f" % (results['all']['path_length_weighted_goal_condition_success_rate']))
print("-------------")
if high_res_images_dir is not None:
saver = VideoSaver()
saver.save(high_res_images_dir, os.path.join(high_res_images_dir, "video.mp4"))
print("Action prediction analysis")
for a_type, a_stats in results['all']['action_stats'].items():
print(
f"- {a_type} execution success rate = {a_stats['success_rate']:.3f} ({a_stats['success']}/{a_stats['total']})")
print("-------------")
# task type specific results
task_types = ['pick_and_place_simple', 'pick_clean_then_place_in_recep', 'pick_heat_then_place_in_recep',
'pick_cool_then_place_in_recep', 'pick_two_obj_and_place', 'look_at_obj_in_light',
'pick_and_place_with_movable_recep']
for task_type in task_types:
task_successes = [s for s in (list(successes)) if s['type'] == task_type]
task_failures = [f for f in (list(failures)) if f['type'] == task_type]
if len(task_successes) > 0 or len(task_failures) > 0:
results[task_type] = cls.get_metrics(task_successes, task_failures)
else:
results[task_type] = {}
lock.release()