in MTRF/algorithms/softlearning/environments/gym/mujoco/pusher_2d.py [0:0]
def reset_model(self):
if self._reset_mode == "free":
qpos = self._last_qpos.copy()
qvel = self.init_qvel.copy().squeeze()
elif self._reset_mode == "random": # random puck pos + random arm angles
qvel = self.init_qvel.copy().squeeze()
qvel[self.QPOS_PUCK_INDS] = 0
qvel[self.QPOS_GOAL_INDS] = 0
qpos = self.init_qpos.copy()
valid_arm_pos = False
while (not valid_arm_pos):
qpos[self.QPOS_JOINT_INDS] = np.random.uniform(
low=(-np.pi, -np.pi*3/4, -np.pi/2),
high=(np.pi, np.pi*3/4, np.pi/2)
)
self.set_state(np.array(qpos), np.array(qvel))
eef_pos = self.get_body_com("distal_4")[:2]
if np.all(np.abs(eef_pos) <= 0.9):
valid_arm_pos = True
qpos[self.QPOS_PUCK_INDS] = np.random.uniform(
low=(self._puck_initial_x_range[0],
self._puck_initial_y_range[0]),
high=(self._puck_initial_x_range[1],
self._puck_initial_y_range[1])
)
elif self._reset_mode == "random_puck": # just randomize puck pos
qpos = self.init_qpos.copy()
qpos[self.QPOS_PUCK_INDS] = np.random.uniform(
low=(self._puck_initial_x_range[0],
self._puck_initial_y_range[0]),
high=(self._puck_initial_x_range[1],
self._puck_initial_y_range[1])
)
qvel = self.init_qvel.copy().squeeze()
qvel[self.QPOS_PUCK_INDS] = 0
qvel[self.QPOS_GOAL_INDS] = 0
elif self._reset_mode == "distribution":
num_init_states = self._init_states.shape[0]
rand_index = np.random.randint(num_init_states)
init_state = self._init_states[rand_index]
qpos = self.init_qpos.copy()
qpos[self.QPOS_JOINT_INDS] = np.arctan2(
init_state[self.OBS_JOINT_SIN_INDS],
init_state[self.OBS_JOINT_COS_INDS]
)
qpos[self.QPOS_PUCK_INDS] = init_state[self.OBS_PUCK_INDS]
qvel = self.init_qvel.copy().squeeze()
else:
raise ValueError("reset mode must be specified correctly")
if self._goal_mode == "random":
if self._num_goals == 1:
qpos[self.QPOS_GOAL_INDS] = self._goals[0]
elif self._num_goals > 1:
if self._swap_goal_upon_completion:
puck_position = self.get_body_com("puck")[:2]
goal_position = self.get_body_com("goal")[:2]
if np.linalg.norm(puck_position - goal_position) < 0.01:
other_goal_indices = [i for i in range(self._num_goals)
if i != self._current_goal_index]
self._current_goal_index = np.random.choice(
other_goal_indices)
else:
self._current_goal_index = np.random.randint(self._num_goals)
qpos[self.QPOS_GOAL_INDS] = self._goals[self._current_goal_index]
else:
qpos[self.QPOS_GOAL_INDS] = np.random.uniform(
low=(self._goal_x_range[0],
self._goal_y_range[0]),
high=(self._goal_x_range[1],
self._goal_y_range[1])
)
elif self._goal_mode == "curriculum-radius":
self.goal_sampling_radius += self._goal_sampling_radius_increment
puck_position = self.get_body_com("puck"[:2])
bounds = np.array([puck_position - self.goal_sampling_radius,
puck_position + self.goal_sampling_radius])
bounds = np.clip(bounds, -1, 1)
goal = np.random.uniform(
low=bounds[0, :], high=bounds[1, :]
)
from pprint import pprint; import ipdb; ipdb.set_trace(context=30)
qpos[self.QPOS_GOAL_INDS] = goal
else:
raise ValueError("Invalid goal mode")
# TODO: remnants from rllab -> gym conversion
# qacc = np.zeros(self.sim.data.qacc.shape[0])
# ctrl = np.zeros(self.sim.data.ctrl.shape[0])
# full_state = np.concatenate((qpos, qvel, qacc, ctrl))
if self._first_step:
qpos[self.QPOS_JOINT_INDS] = np.array([np.pi/4, -np.pi/4, -np.pi/2])
self._first_step = False
self.set_state(np.array(qpos), np.array(qvel))
return self._get_obs()