in MTRF/r3l/r3l/r3l_envs/inhand_env/multi_phase.py [0:0]
def _reset(self):
self.phase_old = self.phase
# TODO: Check the hand is reset properly
if self._should_reset():
self.active_env.reset()
# if self._reset_count > 0:
self.update_phase()
self.phase_cnt = 0
else:
# NOTE: Letting go of the object and allowing the simulation to step
# is important to get the simulation to a steady state, ready for
# the next `update_phase` call.
if self._drop_object_on_pickup and self.get_obs_dict()["object_xyz"][2] > 0.85:
self._release_object()
# if self._reset_count > 0:
self.update_phase()
if self.phase != self.phase_old:
self.phase_cnt = 0
self.phase_old = self.phase
else:
# Count your days if you are stuck
self.phase_cnt += 1
# Move on with life, you are stuck in a phase or the object has been stuck
# (even if you are solving the task, this is important to improve all phases)
if self.phase_cnt >= self._max_episodes_in_phase:
if self._verbose:
print("Stuck in a phase, requesting forced reset")
# TODO: Not sure which reset to use here.
self.active_env.reset()
# TODO(justinvyu): I think this extra update_phase screws things up in logging
# self.update_phase()
self.phase_cnt = 0
# Use a loaded policy to run through the phase, if we aren't
# interested in training for it.
# if self.phase not in self._training_phases:
# self.execute_phase(self.phase, self.phase_cnt)
# # Update the phase with the same logic as above
# if self.last_obs_dict["object_xyz"][2] > 0.85: # drop object if picked
# if self._verbose:
# print("Dropping object")
# self.do_simulation(self.act_mid - 0.75 * self.act_rng, 300)
# self.phase_old = self.phase
# self.update_phase()
# self.phase_cnt = 0
"""
If we specify a maximum number of episodes stuck, then keep track
of the last `self._max_episodes_stuck` object positions. If the
difference of the current position is less than a certain threshold
`self._stuck_radius` relative to the some number of episodes ago,
then do a full reset.
"""
if 0 < self._max_episodes_stuck < np.inf:
obj_xyz = self.get_obs_dict()["object_xyz"]
self._obj_xyz_history.append(obj_xyz)
# Need to have seen at least `_max_episodes_stuck` positions
self._object_xy_range = 0
if len(self._obj_xyz_history) == self._max_episodes_stuck:
xy_history = np.array(self._obj_xyz_history)[:, :2]
# Use the max L2 distance from latest timestep to some
# timestep in the last `max_episodes_stuck` timesteps
xy_range = self._object_xy_range = np.max(
np.linalg.norm(xy_history - xy_history[-1], axis=1)
)
# Use standard deviation to measure movement
# xyz_spread = np.array(self._obj_xyz_history)[:, :2].std(axis=0).mean()
if xy_range < self._stuck_radius:
self._stuck_reset_count += 1
self.active_env.reset()
self._obj_xyz_history.clear()
# Run the reset policy if one exists
if self._should_reset():
self.active_env.run_reset_policy()
# Always reset the robot, the active env will handle the case if it
# shouldn't reset the robot
self.active_env.reset_robot()
if hasattr(self.active_env, "reset_target"):
self.active_env.reset_target()
if self._verbose:
print("Current Phase: {}, Episodes since last phase change: {}".format(self.phase, self.phase_cnt))