in habitat/tasks/rearrange/sub_tasks/pick_sensors.py [0:0]
def update_metric(self, *args, episode, task, observations, **kwargs):
super().update_metric(
*args,
episode=episode,
task=task,
observations=observations,
**kwargs
)
reward = self._metric
ee_to_object_distance = task.measurements.measures[
EndEffectorToObjectDistance.cls_uuid
].get_metric()
ee_to_rest_distance = task.measurements.measures[
EndEffectorToRestDistance.cls_uuid
].get_metric()
snapped_id = self._sim.grasp_mgr.snap_idx
cur_picked = snapped_id is not None
if cur_picked:
dist_to_goal = ee_to_rest_distance
else:
dist_to_goal = ee_to_object_distance[task.targ_idx]
abs_targ_obj_idx = self._sim.scene_obj_ids[task.abs_targ_idx]
did_pick = cur_picked and (not self._task.prev_picked)
if did_pick:
if snapped_id == abs_targ_obj_idx:
task.n_succ_picks += 1
reward += self._config.PICK_REWARD
# If we just transitioned to the next stage our current
# distance is stale.
self.cur_dist = -1
else:
# picked the wrong object
reward -= self._config.WRONG_PICK_PEN
if self._config.WRONG_PICK_SHOULD_END:
self._task.should_end = True
self._metric = reward
return
if self._config.USE_DIFF:
if self.cur_dist < 0:
dist_diff = 0.0
else:
dist_diff = self.cur_dist - dist_to_goal
# Filter out the small fluctuations
dist_diff = round(dist_diff, 3)
reward += self._config.DIST_REWARD * dist_diff
else:
reward -= self._config.DIST_REWARD * dist_to_goal
self.cur_dist = dist_to_goal
if not cur_picked and self._prev_picked:
# Dropped the object
reward -= self._config.DROP_PEN
if self._config.DROP_OBJ_SHOULD_END:
self._task.should_end = True
self._metric = reward
return
self._task.prev_picked = cur_picked
self._prev_picked = self._sim.grasp_mgr.snap_idx is not None
self._metric = reward