gym-compete/gym_compete/new_envs/you_shall_not_pass.py (75 lines of code) (raw):

from .multi_agent_env import MultiAgentEnv import numpy as np class HumansBlockingEnv(MultiAgentEnv): ''' Two teams: walker and blocker Walker needs to reach the other end and bloker need to block them Rewards: Some Walker reaches end: walker which did touchdown: +1000 all blockers: -1000 No Walker reaches end: all walkers: -1000 if blocker is standing: blocker gets +1000 else: blocker gets 0 NOTE: walker is fallen if z < 0.3 ''' def __init__(self, max_episode_steps=500, **kwargs): super(HumansBlockingEnv, self).__init__(**kwargs) self._max_episode_steps = max_episode_steps self._elapsed_steps = 0 self.GOAL_REWARD = 1000 def _past_limit(self): if self._max_episode_steps <= self._elapsed_steps: return True return False def _is_standing(self, agent_id, limit=0.3): return bool(self.agents[agent_id].get_qpos()[2] > limit) def _get_done(self, dones, game_done): dones = tuple(game_done for _ in range(self.n_agents)) return dones def goal_rewards(self, infos=None, agent_dones=None): self._elapsed_steps += 1 goal_rews = [0. for _ in range(self.n_agents)] touchdowns = [self.agents[i].reached_goal() for i in range(self.n_agents)] walkers_fallen = [not self._is_standing(i) for i in range(self.n_agents) if self.agents[i].team == 'walker'] # print(self._elapsed_steps, touchdowns, walkers_fallen) done = self._past_limit() or all(walkers_fallen) # print(self._elapsed_steps,touchdowns, walkers_fallen) if not any(touchdowns): all_walkers_fallen = all(walkers_fallen) # game_over = all_walkers_fallen for j in range(self.n_agents): if self.agents[j].team == 'blocker': # goal_rews[j] += -infos[1-j]['reward_goal_dist'] infos[j]['reward_move'] += -infos[1-j]['reward_goal_dist'] if all_walkers_fallen and self.agents[j].team == 'blocker': if self._is_standing(j): goal_rews[j] += self.GOAL_REWARD infos[j]['winner'] = True elif done: if self.agents[j].team == 'walker': goal_rews[j] -= self.GOAL_REWARD else: infos[j]['winner'] = True else: # some walker touched-down done = True for i in range(self.n_agents): if self.agents[i].team == 'walker': if touchdowns[i]: goal_rews[i] += self.GOAL_REWARD infos[i]['winner'] = True else: goal_rews[i] -= self.GOAL_REWARD # print(done, self._elapsed_steps, self._past_limit()) return goal_rews, done def _reset(self): self._elapsed_steps = 0 ob = super(HumansBlockingEnv, self)._reset() return ob def reset(self, margins=None): ob = self._reset() if margins: for i in range(self.n_agents): self.agents[i].set_margin(margins[i]) return ob