in mae_envs/envs/hide_and_seek.py [0:0]
def make_env(n_substeps=15, horizon=80, deterministic_mode=False,
floor_size=6.0, grid_size=30, door_size=2,
n_hiders=1, n_seekers=1, max_n_agents=None,
n_boxes=2, n_ramps=1, n_elongated_boxes=0,
rand_num_elongated_boxes=False, n_min_boxes=None,
box_size=0.5, boxid_obs=False, box_only_z_rot=True,
rew_type='joint_zero_sum',
lock_box=True, grab_box=True, lock_ramp=True,
lock_type='any_lock_specific',
lock_grab_radius=0.25, lock_out_of_vision=True, grab_exclusive=False,
grab_out_of_vision=False, grab_selective=False,
box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
action_lims=(-0.9, 0.9), polar_obs=True,
scenario='quadrant', quadrant_game_hider_uniform_placement=False,
p_door_dropout=0.0,
n_rooms=4, random_room_number=True, prob_outside_walls=1.0,
n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
hiders_together_radius=None, seekers_together_radius=None,
prep_fraction=0.4, prep_obs=False,
team_size_obs=False,
restrict_rect=None, penalize_objects_out=False,
n_food=0, food_radius=None, food_respawn_time=None, max_food_health=1,
food_together_radius=None, food_rew_type='selfish', eat_when_caught=False,
food_reward_scale=1.0, food_normal_centered=False, food_box_centered=False,
n_food_cluster=1):
grab_radius_multiplier = lock_grab_radius / box_size
lock_radius_multiplier = lock_grab_radius / box_size
env = Base(n_agents=n_hiders + n_seekers, n_substeps=n_substeps, horizon=horizon,
floor_size=floor_size, grid_size=grid_size,
action_lims=action_lims,
deterministic_mode=deterministic_mode)
if scenario == 'randomwalls':
env.add_module(RandomWalls(
grid_size=grid_size, num_rooms=n_rooms,
random_room_number=random_room_number, min_room_size=6,
door_size=door_size,
prob_outside_walls=prob_outside_walls, gen_door_obs=False))
box_placement_fn = uniform_placement
ramp_placement_fn = uniform_placement
cell_size = floor_size / grid_size
first_hider_placement = uniform_placement
if hiders_together_radius is not None:
htr_in_cells = np.ceil(hiders_together_radius / cell_size).astype(int)
env.metadata['hiders_together_radius'] = htr_in_cells
close_to_first_hider_placement = close_to_other_object_placement(
"agent", 0, "hiders_together_radius")
agent_placement_fn = [first_hider_placement] + \
[close_to_first_hider_placement] * (n_hiders - 1)
else:
agent_placement_fn = [first_hider_placement] * n_hiders
first_seeker_placement = uniform_placement
if seekers_together_radius is not None:
str_in_cells = np.ceil(seekers_together_radius / cell_size).astype(int)
env.metadata['seekers_together_radius'] = str_in_cells
close_to_first_seeker_placement = close_to_other_object_placement(
"agent", n_hiders, "seekers_together_radius")
agent_placement_fn += [first_seeker_placement] + \
[close_to_first_seeker_placement] * (n_seekers - 1)
else:
agent_placement_fn += [first_seeker_placement] * (n_seekers)
elif scenario == 'quadrant':
env.add_module(WallScenarios(grid_size=grid_size, door_size=door_size,
scenario=scenario, friction=other_friction,
p_door_dropout=p_door_dropout))
box_placement_fn = quadrant_placement
ramp_placement_fn = uniform_placement
hider_placement = uniform_placement if quadrant_game_hider_uniform_placement else quadrant_placement
agent_placement_fn = [hider_placement] * n_hiders + [outside_quadrant_placement] * n_seekers
else:
raise ValueError(f"Scenario {scenario} not supported.")
env.add_module(Agents(n_hiders + n_seekers,
placement_fn=agent_placement_fn,
color=[np.array((66., 235., 244., 255.)) / 255] * n_hiders + [(1., 0., 0., 1.)] * n_seekers,
friction=other_friction,
polar_obs=polar_obs))
if np.max(n_boxes) > 0:
env.add_module(Boxes(n_boxes=n_boxes, placement_fn=box_placement_fn,
friction=box_floor_friction, polar_obs=polar_obs,
n_elongated_boxes=n_elongated_boxes,
boxid_obs=boxid_obs, box_only_z_rot=box_only_z_rot))
if n_ramps > 0:
env.add_module(Ramps(n_ramps=n_ramps, placement_fn=ramp_placement_fn, friction=other_friction, polar_obs=polar_obs,
pad_ramp_size=(np.max(n_elongated_boxes) > 0)))
if n_lidar_per_agent > 0 and visualize_lidar:
env.add_module(LidarSites(n_agents=n_hiders + n_seekers, n_lidar_per_agent=n_lidar_per_agent))
if n_food > 0:
if scenario == 'quadrant':
first_food_placement = quadrant_placement
elif food_box_centered:
first_food_placement = uniform_placement_middle(0.25)
else:
first_food_placement = uniform_placement
if food_together_radius is not None:
cell_size = floor_size / grid_size
ftr_in_cells = np.ceil(food_together_radius / cell_size).astype(int)
env.metadata['food_together_radius'] = ftr_in_cells
assert n_food % n_food_cluster == 0
cluster_assignments = np.repeat(np.arange(0, n_food, n_food // n_food_cluster), n_food // n_food_cluster)
food_placement = [close_to_other_object_placement(
"food", i, "food_together_radius") for i in cluster_assignments]
food_placement[::n_food // n_food_cluster] = [first_food_placement] * n_food_cluster
else:
food_placement = first_food_placement
env.add_module(Food(n_food, placement_fn=food_placement))
env.add_module(AgentManipulation())
if box_floor_friction is not None:
env.add_module(FloorAttributes(friction=box_floor_friction))
env.add_module(WorldConstants(gravity=gravity))
env.reset()
keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs']
keys_mask_self = ['mask_aa_obs']
keys_external = ['agent_qpos_qvel']
keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
keys_mask_external = []
env = SplitMultiAgentActions(env)
if team_size_obs:
keys_self += ['team_size']
env = TeamMembership(env, np.append(np.zeros((n_hiders,)), np.ones((n_seekers,))))
env = AgentAgentObsMask2D(env)
hider_obs = np.array([[1]] * n_hiders + [[0]] * n_seekers)
env = AddConstantObservationsWrapper(env, new_obs={'hider': hider_obs})
env = HideAndSeekRewardWrapper(env, n_hiders=n_hiders, n_seekers=n_seekers,
rew_type=rew_type)
if restrict_rect is not None:
env = RestrictAgentsRect(env, restrict_rect=restrict_rect, penalize_objects_out=penalize_objects_out)
env = PreparationPhase(env, prep_fraction=prep_fraction)
env = DiscretizeActionWrapper(env, 'action_movement')
if np.max(n_boxes) > 0:
env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
geom_idxs_obs_key='box_geom_idxs')
keys_external += ['mask_ab_obs', 'box_obs']
keys_mask_external.append('mask_ab_obs')
if n_food:
env = AgentSiteObsMask2D(env, pos_obs_key='food_pos', mask_obs_key='mask_af_obs')
env = FoodHealthWrapper(env, respawn_time=(np.inf if food_respawn_time is None else food_respawn_time),
eat_thresh=(np.inf if food_radius is None else food_radius),
max_food_health=max_food_health, food_rew_type=food_rew_type,
reward_scale=food_reward_scale)
env = MaskActionWrapper(env, 'action_eat_food', ['mask_af_obs']) # Can only eat if in vision
if prep_obs:
env = MaskPrepPhaseAction(env, 'action_eat_food')
if not eat_when_caught:
env = MaskUnseenAction(env, 0, 'action_eat_food')
eat_agents = np.arange(n_hiders)
env = AlwaysEatWrapper(env, agent_idx_allowed=eat_agents)
keys_external += ['mask_af_obs', 'food_obs']
keys_mask_external.append('mask_af_obs')
if lock_box and np.max(n_boxes) > 0:
env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(np.max(n_boxes))],
agent_idx_allowed_to_lock=np.arange(n_hiders+n_seekers),
lock_type=lock_type, radius_multiplier=lock_radius_multiplier,
obj_in_game_metadata_keys=["curr_n_boxes"],
agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"])
if n_ramps > 0:
env = AgentGeomObsMask2D(env, pos_obs_key='ramp_pos', mask_obs_key='mask_ar_obs',
geom_idxs_obs_key='ramp_geom_idxs')
if lock_ramp:
env = LockObjWrapper(env, body_names=[f'ramp{i}:ramp' for i in range(n_ramps)],
agent_idx_allowed_to_lock=np.arange(n_hiders+n_seekers),
lock_type=lock_type, ac_obs_prefix='ramp_',
radius_multiplier=lock_radius_multiplier,
obj_in_game_metadata_keys=['curr_n_ramps'],
agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ar_obs"])
keys_external += ['ramp_obs']
keys_mask_external.append('mask_ar_obs')
if grab_box and (np.max(n_boxes) > 0 or n_ramps > 0):
env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(np.max(n_boxes))] + ([f"ramp{i}:ramp" for i in range(n_ramps)]),
radius_multiplier=grab_radius_multiplier,
grab_exclusive=grab_exclusive,
obj_in_game_metadata_keys=['curr_n_boxes', 'curr_n_ramps'])
if n_lidar_per_agent > 0:
env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
compress_lidar_scale=compress_lidar_scale)
keys_copy += ['lidar']
keys_external += ['lidar']
if prep_obs:
env = TrackStatWrapper(env, np.max(n_boxes), n_ramps, n_food)
env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy, keys_self_matrices=keys_mask_self)
env = SpoofEntityWrapper(env, np.max(n_boxes), ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs'])
if n_food:
env = SpoofEntityWrapper(env, n_food, ['food_obs'], ['mask_af_obs'])
keys_mask_external += ['mask_ab_obs_spoof', 'mask_af_obs_spoof']
if max_n_agents is not None:
env = SpoofEntityWrapper(env, max_n_agents, ['agent_qpos_qvel', 'hider', 'prep_obs'], ['mask_aa_obs'])
env = LockAllWrapper(env, remove_object_specific_lock=True)
if not grab_out_of_vision and grab_box:
env = MaskActionWrapper(env, 'action_pull',
['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else []))
if not grab_selective and grab_box:
env = GrabClosestWrapper(env)
env = NoActionsInPrepPhase(env, np.arange(n_hiders, n_hiders + n_seekers))
env = DiscardMujocoExceptionEpisodes(env)
env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
'ramp_obs': ['ramp_obs'] + (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock'] if lock_ramp else [])})
env = SelectKeysWrapper(env, keys_self=keys_self,
keys_other=keys_external + keys_mask_self + keys_mask_external)
return env