in gym/gym/envs/mujoco/ant_movement.py [0:0]
def _step(self, a):
# print(self.data.qpos.shape)
xposbefore = self.data.qpos[0,0] if (self.realgoal[0] == 0 or self.realgoal[0] == 1) else self.data.qpos[1,0]
yposbefore = self.data.qpos[1,0] if (self.realgoal[0] == 0 or self.realgoal[0] == 1) else self.data.qpos[0,0]
self.do_simulation(a, self.frame_skip)
xposafter = self.data.qpos[0,0] if (self.realgoal[0] == 0 or self.realgoal[0] == 1) else self.data.qpos[1,0]
yposafter = self.data.qpos[1,0] if (self.realgoal[0] == 0 or self.realgoal[0] == 1) else self.data.qpos[0,0]
forward_reward = (xposafter - xposbefore)/self.dt
# if self.realgoal[0] == 1 or self.realgoal[0] == 3:
# forward_reward = forward_reward * -1
side_reward = np.abs(yposafter) * 0.5
ctrl_cost = .1 * np.square(a).sum()
reward = forward_reward - ctrl_cost - side_reward
done = False
ob = self._get_obs()
return ob, reward, done, dict(forward_reward=forward_reward, ctrl_cost=ctrl_cost, side_reward=side_reward)