in gym-compete/gym_compete/new_envs/agents/humanoid.py [0:0]
def after_step(self, action):
pos_after = mass_center(self.get_body_mass(), self.get_xipos())
forward_reward = 0.25 * (pos_after - self._pos_before) / self.env.model.opt.timestep
if self.move_left:
forward_reward *= -1
ctrl_cost = .1 * np.square(action).sum()
cfrc_ext = self.get_cfrc_ext()
contact_cost = .5e-6 * np.square(cfrc_ext).sum()
contact_cost = min(contact_cost, 10)
qpos = self.get_qpos()
agent_standing = qpos[2] >= 1.0
survive = 5.0 if agent_standing else -5.
reward = forward_reward - ctrl_cost - contact_cost + survive
reward_goal = - np.abs(np.asscalar(qpos[0]) - self.GOAL)
reward += reward_goal
reward_info = dict()
reward_info['reward_forward'] = forward_reward
reward_info['reward_ctrl'] = ctrl_cost
reward_info['reward_contact'] = contact_cost
reward_info['reward_survive'] = survive
if self.team == 'walker':
reward_info['reward_goal_dist'] = reward_goal
reward_info['reward_move'] = reward
# done = not agent_standing
done = qpos[2] < 0.8
return reward, done, reward_info