def reset_model()

in MTRF/algorithms/softlearning/environments/gym/mujoco/pusher_2d.py [0:0]


    def reset_model(self):
        if self._reset_mode == "free":
            qpos = self._last_qpos.copy()
            qvel = self.init_qvel.copy().squeeze()
        elif self._reset_mode == "random": # random puck pos + random arm angles
            qvel = self.init_qvel.copy().squeeze()
            qvel[self.QPOS_PUCK_INDS] = 0
            qvel[self.QPOS_GOAL_INDS] = 0

            qpos = self.init_qpos.copy()
            valid_arm_pos = False
            while (not valid_arm_pos):
                qpos[self.QPOS_JOINT_INDS] = np.random.uniform(
                    low=(-np.pi, -np.pi*3/4, -np.pi/2),
                    high=(np.pi, np.pi*3/4, np.pi/2)
                )
                self.set_state(np.array(qpos), np.array(qvel))
                eef_pos = self.get_body_com("distal_4")[:2]
                if np.all(np.abs(eef_pos) <= 0.9):
                    valid_arm_pos = True

            qpos[self.QPOS_PUCK_INDS] = np.random.uniform(
                low=(self._puck_initial_x_range[0],
                     self._puck_initial_y_range[0]),
                high=(self._puck_initial_x_range[1],
                      self._puck_initial_y_range[1])
                )

        elif self._reset_mode == "random_puck": # just randomize puck pos
            qpos = self.init_qpos.copy()

            qpos[self.QPOS_PUCK_INDS] = np.random.uniform(
                low=(self._puck_initial_x_range[0],
                     self._puck_initial_y_range[0]),
                high=(self._puck_initial_x_range[1],
                      self._puck_initial_y_range[1])
                )
            qvel = self.init_qvel.copy().squeeze()
            qvel[self.QPOS_PUCK_INDS] = 0
            qvel[self.QPOS_GOAL_INDS] = 0

        elif self._reset_mode == "distribution":
            num_init_states = self._init_states.shape[0]
            rand_index = np.random.randint(num_init_states)
            init_state = self._init_states[rand_index]

            qpos = self.init_qpos.copy()
            qpos[self.QPOS_JOINT_INDS] = np.arctan2(
                init_state[self.OBS_JOINT_SIN_INDS],
                init_state[self.OBS_JOINT_COS_INDS]
            )
            qpos[self.QPOS_PUCK_INDS] = init_state[self.OBS_PUCK_INDS]
            qvel = self.init_qvel.copy().squeeze()
        else:
            raise ValueError("reset mode must be specified correctly")

        if self._goal_mode == "random":
            if self._num_goals == 1:
                qpos[self.QPOS_GOAL_INDS] = self._goals[0]
            elif self._num_goals > 1:
                if self._swap_goal_upon_completion:
                    puck_position = self.get_body_com("puck")[:2]
                    goal_position = self.get_body_com("goal")[:2]
                    if np.linalg.norm(puck_position - goal_position) < 0.01:
                        other_goal_indices = [i for i in range(self._num_goals)
                                              if i != self._current_goal_index]
                        self._current_goal_index = np.random.choice(
                            other_goal_indices)
                else:
                    self._current_goal_index = np.random.randint(self._num_goals)
                    qpos[self.QPOS_GOAL_INDS] = self._goals[self._current_goal_index]
            else:
                qpos[self.QPOS_GOAL_INDS] = np.random.uniform(
                    low=(self._goal_x_range[0],
                         self._goal_y_range[0]),
                    high=(self._goal_x_range[1],
                          self._goal_y_range[1])
                )
        elif self._goal_mode == "curriculum-radius":
            self.goal_sampling_radius += self._goal_sampling_radius_increment
            puck_position = self.get_body_com("puck"[:2])
            bounds = np.array([puck_position - self.goal_sampling_radius,
                               puck_position + self.goal_sampling_radius])
            bounds = np.clip(bounds, -1, 1)

            goal = np.random.uniform(
                low=bounds[0, :], high=bounds[1, :]
            )
            from pprint import pprint; import ipdb; ipdb.set_trace(context=30)

            qpos[self.QPOS_GOAL_INDS] = goal
        else:
            raise ValueError("Invalid goal mode")
        # TODO: remnants from rllab -> gym conversion
        # qacc = np.zeros(self.sim.data.qacc.shape[0])
        # ctrl = np.zeros(self.sim.data.ctrl.shape[0])
        # full_state = np.concatenate((qpos, qvel, qacc, ctrl))
        if self._first_step:
            qpos[self.QPOS_JOINT_INDS] = np.array([np.pi/4, -np.pi/4, -np.pi/2])
            self._first_step = False

        self.set_state(np.array(qpos), np.array(qvel))

        return self._get_obs()