def goal_rewards()

in gym-compete/gym_compete/new_envs/kick_and_defend.py [0:0]


    def goal_rewards(self, infos=None, agent_dones=None):
        self._elapsed_steps += 1
        # print(self._elapsed_steps, self.keeper_touched_ball)
        goal_rews = [0. for _ in range(self.n_agents)]
        ball_xyz = self.get_ball_qpos()[:3]
        done = self._past_limit() or (self.GOAL_X > 0 and ball_xyz[0] > self.GOAL_X) or (self.GOAL_X < 0 and ball_xyz[0] < self.GOAL_X)
        ball_vel = self.get_ball_qvel()[:3]
        if ball_vel[0] < 0 and np.linalg.norm(ball_vel) > 1:
            done = True
            # print("Keeper stopped ball, vel:", ball_vel)
        # agent_fallen = [self.agents[i].get_qpos()[2] < 0.5 for i in range(self.n_agents)]
        # import ipdb; ipdb.set_trace()
        ball_contacts = self.get_ball_contacts(self.blocker_id)
        if len(ball_contacts) > 0:
            # print("detected contacts for keeper:", ball_contacts)
            self.keeper_touched_ball = True
        if self.is_goal():
            for i in range(self.n_agents):
                if self.agents[i].team == 'walker':
                    goal_rews[i] += self.GOAL_REWARD
                    infos[i]['winner'] = True
                else:
                    goal_rews[i] -= self.GOAL_REWARD
            done = True
        elif done or all(agent_dones):
            for i in range(self.n_agents):
                if self.agents[i].team == 'walker':
                        goal_rews[i] -= self.GOAL_REWARD
                else:
                    goal_rews[i] += self.GOAL_REWARD
                    infos[i]['winner'] = True
                    if self.keeper_touched_ball:
                        # ball contact bonus
                        goal_rews[i] += 0.5 * self.GOAL_REWARD
                    if self.agents[i].get_qpos()[2] > 0.8:
                        # standing bonus
                        goal_rews[i] += 0.5 * self.GOAL_REWARD
        else:
            keeper_penalty = False
            for i in range(self.n_agents):
                if self.agents[i].team == 'blocker':
                    if np.abs(self.GOAL_X - self.agents[i].get_qpos()[0]) > 2.5:
                        keeper_penalty = True
                        # print("keeper x:", self.agents[i].get_qpos()[0], "goal_x:", self.GOAL_X)
                        print("Keeper foul!")
                        break
            if keeper_penalty:
                done = True
                for i in range(self.n_agents):
                    if self.agents[i].team == 'blocker':
                        goal_rews[i] -= self.GOAL_REWARD
            else:
                for i in range(self.n_agents):
                    if self.agents[i].team == 'walker':
                        # goal_rews[i] -= np.abs(ball_xyz[0] - self.GOAL_X)
                        infos[i]['reward_move'] -= np.asscalar(np.abs(ball_xyz[0] - self.GOAL_X))
                    else:
                        infos[i]['reward_move'] += np.asscalar(np.abs(ball_xyz[0] - self.GOAL_X))
                        # if len(ball_contacts) > 0:
                        #     # ball contact bonus
                        #     print("detected contacts for keeper:", ball_contacts)
                        #     goal_rews[i] += 0.5 * self.GOAL_REWARD
        return goal_rews, done