in gym-compete/gym_compete/new_envs/agents/humanoid_kicker.py [0:0]
def after_step(self, action):
action = np.clip(action, self.action_space.low, self.action_space.high)
# print(action)
_, done, rinfo = super(HumanoidKicker, self).after_step(action)
ball_xy = self.get_ball_qpos()[:2]
my_xy = self.get_qpos()[:2]
ball_dist = np.sqrt(np.sum((my_xy - ball_xy)**2))
rinfo['reward_goal_dist'] = np.asscalar(ball_dist)
reward = rinfo['reward_forward'] - rinfo['reward_ctrl'] - rinfo['reward_contact'] + rinfo['reward_survive'] - rinfo['reward_goal_dist']
rinfo['reward_move'] = reward
assert np.isfinite(reward), (rinfo, action)
return reward, done, rinfo