in gym/envs/mujoco/ant.py [0:0]
def step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore) / self.dt
ctrl_cost = 0.5 * np.square(a).sum()
contact_cost = (
0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
)
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
not_terminated = (
np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
)
terminated = not not_terminated
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return (
ob,
reward,
terminated,
False,
dict(
reward_forward=forward_reward,
reward_ctrl=-ctrl_cost,
reward_contact=-contact_cost,
reward_survive=survive_reward,
),
)