in gym-compete/gym_compete/new_envs/sumo.py [0:0]
def goal_rewards(self, infos=None, agent_dones=None):
self._elapsed_steps += 1
goal_rews = [0. for _ in range(self.n_agents)]
fallen = [self._is_fallen(i)
for i in range(self.n_agents)]
timeup = self._past_limit()
past_arena = [self._past_arena(i) for i in range(self.n_agents)]
done = False
agent_contacts = self.get_agent_contacts()
if len(agent_contacts) > 0:
# print('Detected contacts:', agent_contacts)
self.agent_contacts = True
if any(fallen):
done = True
for j in range(self.n_agents):
if fallen[j]:
print('Agent', j, 'fallen')
goal_rews[j] -= self.GOAL_REWARD
elif self.agent_contacts:
goal_rews[j] += self.GOAL_REWARD
infos[j]['winner'] = True
# import ipdb; ipdb.set_trace()
elif any(past_arena):
done = True
for j in range(self.n_agents):
if past_arena[j]:
print('Agent', j, 'past arena')
goal_rews[j] -= self.GOAL_REWARD
elif self.agent_contacts:
goal_rews[j] += self.GOAL_REWARD
infos[j]['winner'] = True
elif timeup:
for j in range(self.n_agents):
goal_rews[j] -= self.GOAL_REWARD
done = timeup or done
return goal_rews, done