def _reset()

in MTRF/r3l/r3l/r3l_envs/inhand_env/multi_phase.py [0:0]


    def _reset(self):
        self.phase_old = self.phase
        # TODO: Check the hand is reset properly
        if self._should_reset():
            self.active_env.reset()
            # if self._reset_count > 0:
            self.update_phase()
            self.phase_cnt = 0
        else:
            # NOTE: Letting go of the object and allowing the simulation to step
            # is important to get the simulation to a steady state, ready for
            # the next `update_phase` call.
            if self._drop_object_on_pickup and self.get_obs_dict()["object_xyz"][2] > 0.85:
                self._release_object()

            # if self._reset_count > 0:
            self.update_phase()
            if self.phase != self.phase_old:
                self.phase_cnt = 0
                self.phase_old = self.phase
            else:
                # Count your days if you are stuck
                self.phase_cnt += 1

                # Move on with life, you are stuck in a phase or the object has been stuck
                # (even if you are solving the task, this is important to improve all phases)
                if self.phase_cnt >= self._max_episodes_in_phase:
                    if self._verbose:
                        print("Stuck in a phase, requesting forced reset")
                    # TODO: Not sure which reset to use here.
                    self.active_env.reset()
                    # TODO(justinvyu): I think this extra update_phase screws things up in logging
                    # self.update_phase()
                    self.phase_cnt = 0

            # Use a loaded policy to run through the phase, if we aren't
            # interested in training for it.
            # if self.phase not in self._training_phases:
            #     self.execute_phase(self.phase, self.phase_cnt)
            #     # Update the phase with the same logic as above
            #     if self.last_obs_dict["object_xyz"][2] > 0.85:  # drop object if picked
            #         if self._verbose:
            #             print("Dropping object")
            #         self.do_simulation(self.act_mid - 0.75 * self.act_rng, 300)
            #     self.phase_old = self.phase
            #     self.update_phase()
            #     self.phase_cnt = 0

            """
            If we specify a maximum number of episodes stuck, then keep track
            of the last `self._max_episodes_stuck` object positions. If the
            difference of the current position is less than a certain threshold
            `self._stuck_radius` relative to the some number of episodes ago,
            then do a full reset.
            """
            if 0 < self._max_episodes_stuck < np.inf:
                obj_xyz = self.get_obs_dict()["object_xyz"]
                self._obj_xyz_history.append(obj_xyz)
                # Need to have seen at least `_max_episodes_stuck` positions
                self._object_xy_range = 0
                if len(self._obj_xyz_history) == self._max_episodes_stuck:
                    xy_history = np.array(self._obj_xyz_history)[:, :2]
                    # Use the max L2 distance from latest timestep to some
                    # timestep in the last `max_episodes_stuck` timesteps
                    xy_range = self._object_xy_range = np.max(
                        np.linalg.norm(xy_history - xy_history[-1], axis=1)
                    )
                    # Use standard deviation to measure movement
                    # xyz_spread = np.array(self._obj_xyz_history)[:, :2].std(axis=0).mean()
                    if xy_range < self._stuck_radius:
                        self._stuck_reset_count += 1
                        self.active_env.reset()
                        self._obj_xyz_history.clear()

            # Run the reset policy if one exists
            if self._should_reset():
                self.active_env.run_reset_policy()

            # Always reset the robot, the active env will handle the case if it
            # shouldn't reset the robot
            self.active_env.reset_robot()

        if hasattr(self.active_env, "reset_target"):
            self.active_env.reset_target()

        if self._verbose:
            print("Current Phase: {}, Episodes since last phase change: {}".format(self.phase, self.phase_cnt))