multiagent/scenarios/simple_reference.py (61 lines of code) (raw):

import numpy as np from multiagent.core import World, Agent, Landmark from multiagent.scenario import BaseScenario class Scenario(BaseScenario): def make_world(self): world = World() # set any world properties first world.dim_c = 10 world.collaborative = True # whether agents share rewards # add agents world.agents = [Agent() for i in range(2)] for i, agent in enumerate(world.agents): agent.name = 'agent %d' % i agent.collide = False # add landmarks world.landmarks = [Landmark() for i in range(3)] for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark %d' % i landmark.collide = False landmark.movable = False # make initial conditions self.reset_world(world) return world def reset_world(self, world): # assign goals to agents for agent in world.agents: agent.goal_a = None agent.goal_b = None # want other agent to go to the goal landmark world.agents[0].goal_a = world.agents[1] world.agents[0].goal_b = np.random.choice(world.landmarks) world.agents[1].goal_a = world.agents[0] world.agents[1].goal_b = np.random.choice(world.landmarks) # random properties for agents for i, agent in enumerate(world.agents): agent.color = np.array([0.25,0.25,0.25]) # random properties for landmarks world.landmarks[0].color = np.array([0.75,0.25,0.25]) world.landmarks[1].color = np.array([0.25,0.75,0.25]) world.landmarks[2].color = np.array([0.25,0.25,0.75]) # special colors for goals world.agents[0].goal_a.color = world.agents[0].goal_b.color world.agents[1].goal_a.color = world.agents[1].goal_b.color # set random initial states for agent in world.agents: agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p) agent.state.p_vel = np.zeros(world.dim_p) agent.state.c = np.zeros(world.dim_c) for i, landmark in enumerate(world.landmarks): landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p) landmark.state.p_vel = np.zeros(world.dim_p) def reward(self, agent, world): if agent.goal_a is None or agent.goal_b is None: return 0.0 dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos)) return -dist2 def observation(self, agent, world): # goal color goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)] if agent.goal_b is not None: goal_color[1] = agent.goal_b.color # get positions of all entities in this agent's reference frame entity_pos = [] for entity in world.landmarks: entity_pos.append(entity.state.p_pos - agent.state.p_pos) # entity colors entity_color = [] for entity in world.landmarks: entity_color.append(entity.color) # communication of all other agents comm = [] for other in world.agents: if other is agent: continue comm.append(other.state.c) return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm)