in multiagent/environment.py [0:0]
def __init__(self, world, reset_callback=None, reward_callback=None,
observation_callback=None, info_callback=None,
done_callback=None, shared_viewer=True):
self.world = world
self.agents = self.world.policy_agents
# set required vectorized gym env property
self.n = len(world.policy_agents)
# scenario callbacks
self.reset_callback = reset_callback
self.reward_callback = reward_callback
self.observation_callback = observation_callback
self.info_callback = info_callback
self.done_callback = done_callback
# environment parameters
self.discrete_action_space = True
# if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
self.discrete_action_input = False
# if true, even the action is continuous, action will be performed discretely
self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
# if true, every agent has the same reward
self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
self.time = 0
# configure spaces
self.action_space = []
self.observation_space = []
for agent in self.agents:
total_action_space = []
# physical action space
if self.discrete_action_space:
u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
else:
u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
if agent.movable:
total_action_space.append(u_action_space)
# communication action space
if self.discrete_action_space:
c_action_space = spaces.Discrete(world.dim_c)
else:
c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
if not agent.silent:
total_action_space.append(c_action_space)
# total action space
if len(total_action_space) > 1:
# all action spaces are discrete, so simplify to MultiDiscrete action space
if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
else:
act_space = spaces.Tuple(total_action_space)
self.action_space.append(act_space)
else:
self.action_space.append(total_action_space[0])
# observation space
obs_dim = len(observation_callback(agent, self.world))
self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
agent.action.c = np.zeros(self.world.dim_c)
# rendering
self.shared_viewer = shared_viewer
if self.shared_viewer:
self.viewers = [None]
else:
self.viewers = [None] * self.n
self._reset_render()