in gym-compete/gym_compete/new_envs/kick_and_defend.py [0:0]
def goal_rewards(self, infos=None, agent_dones=None):
self._elapsed_steps += 1
# print(self._elapsed_steps, self.keeper_touched_ball)
goal_rews = [0. for _ in range(self.n_agents)]
ball_xyz = self.get_ball_qpos()[:3]
done = self._past_limit() or (self.GOAL_X > 0 and ball_xyz[0] > self.GOAL_X) or (self.GOAL_X < 0 and ball_xyz[0] < self.GOAL_X)
ball_vel = self.get_ball_qvel()[:3]
if ball_vel[0] < 0 and np.linalg.norm(ball_vel) > 1:
done = True
# print("Keeper stopped ball, vel:", ball_vel)
# agent_fallen = [self.agents[i].get_qpos()[2] < 0.5 for i in range(self.n_agents)]
# import ipdb; ipdb.set_trace()
ball_contacts = self.get_ball_contacts(self.blocker_id)
if len(ball_contacts) > 0:
# print("detected contacts for keeper:", ball_contacts)
self.keeper_touched_ball = True
if self.is_goal():
for i in range(self.n_agents):
if self.agents[i].team == 'walker':
goal_rews[i] += self.GOAL_REWARD
infos[i]['winner'] = True
else:
goal_rews[i] -= self.GOAL_REWARD
done = True
elif done or all(agent_dones):
for i in range(self.n_agents):
if self.agents[i].team == 'walker':
goal_rews[i] -= self.GOAL_REWARD
else:
goal_rews[i] += self.GOAL_REWARD
infos[i]['winner'] = True
if self.keeper_touched_ball:
# ball contact bonus
goal_rews[i] += 0.5 * self.GOAL_REWARD
if self.agents[i].get_qpos()[2] > 0.8:
# standing bonus
goal_rews[i] += 0.5 * self.GOAL_REWARD
else:
keeper_penalty = False
for i in range(self.n_agents):
if self.agents[i].team == 'blocker':
if np.abs(self.GOAL_X - self.agents[i].get_qpos()[0]) > 2.5:
keeper_penalty = True
# print("keeper x:", self.agents[i].get_qpos()[0], "goal_x:", self.GOAL_X)
print("Keeper foul!")
break
if keeper_penalty:
done = True
for i in range(self.n_agents):
if self.agents[i].team == 'blocker':
goal_rews[i] -= self.GOAL_REWARD
else:
for i in range(self.n_agents):
if self.agents[i].team == 'walker':
# goal_rews[i] -= np.abs(ball_xyz[0] - self.GOAL_X)
infos[i]['reward_move'] -= np.asscalar(np.abs(ball_xyz[0] - self.GOAL_X))
else:
infos[i]['reward_move'] += np.asscalar(np.abs(ball_xyz[0] - self.GOAL_X))
# if len(ball_contacts) > 0:
# # ball contact bonus
# print("detected contacts for keeper:", ball_contacts)
# goal_rews[i] += 0.5 * self.GOAL_REWARD
return goal_rews, done