in randomized_uncertain_social_preferences/rusp/env_oasis.py [0:0]
def make_env(n_substeps=15, n_agents=3,
floor_size=[1.5, 6], action_lims=(-0.9, 0.9),
grid_size=60, other_friction=0.01, box_floor_friction=0.2, gravity=[0, 0, -50],
horizon=1000, horizon_lower=None, horizon_upper=None, prob_per_step_to_stop=0.001,
# Food
n_food=1, n_food_cluster=1, food_radius=0.4,
food_respawn_time=0, max_food_health=5, food_together_radius=0.4,
food_rew_type='selfish', food_reward_scale=0.0,
# Health
max_agent_health=20, health_per_food_bounds=[2.1, 2.7], health_per_step=-1.0,
# Attacking
attack_range=0.7, attack_damage=-5.0, only_attack_in_front=True,
# Death
life_rew=1, death_rew=-100, steps_freeze_on_death=100,
# Random Teams
rusp_args={},
# ID
id_dim=16,
# Action Masking
mask_all_when_dead=True):
env = Base(n_agents=n_agents,
n_substeps=n_substeps,
floor_size=floor_size,
horizon=99999999999999, # Just a big number so actual horizon is done by RandomizedHorizonWrapper
action_lims=action_lims,
deterministic_mode=False,
grid_size=grid_size)
if box_floor_friction is not None:
env.add_module(FloorAttributes(friction=box_floor_friction))
env.add_module(WorldConstants(gravity=gravity))
env.add_module(Agents(n_agents,
placement_fn=uniform_placement,
friction=other_friction))
# Food
env.metadata['food_together_radius'] = food_together_radius
assert n_food % n_food_cluster == 0
cluster_assignments = np.repeat(np.arange(0, n_food, n_food // n_food_cluster), n_food // n_food_cluster)
food_placement = [close_to_other_object_placement(
"food", i, "food_together_radius") for i in cluster_assignments]
food_placement[::n_food // n_food_cluster] = [uniform_placement] * n_food_cluster
env.add_module(Food(n_food, placement_fn=food_placement))
env.reset()
keys_self = [
'agent_qpos_qvel',
'agent_identity',
'agent_health',
'is_dead',
'time_to_alive',
'timestep'
]
keys_additional_self_vf = ['fraction_episode_done', 'horizon']
keys_copy = ['mask_is_dead']
keys_other_agents = [
'agent_qpos_qvel',
'agent_identity',
'agent_health',
'is_dead',
'time_to_alive',
]
keys_additional_other_agents_vf = []
keys_self_matrices = []
add_rew_share_observation_keys(keys_self=keys_self,
keys_additional_self_vf=keys_additional_self_vf,
keys_other_agents=keys_other_agents,
keys_additional_other_agents_vf=keys_additional_other_agents_vf,
keys_self_matrices=keys_self_matrices,
**rusp_args)
keys_external = ['other_agents',
'other_agents_vf',
'additional_self_vf_obs']
keys_self_masks = ['mask_aa_obs']
env = SplitMultiAgentActions(env)
env = DiscretizeActionWrapper(env, 'action_movement')
env = AgentAgentObsMask2D(env)
env = ZeroRews(env)
env = RandomizedHorizonWrapper(env, lower_lim=horizon_lower or horizon, upper_lim=horizon_upper or horizon,
prob_per_step_to_stop=prob_per_step_to_stop)
env = FoodHealthWrapper(env, respawn_time=(np.inf if food_respawn_time is None else food_respawn_time),
eat_thresh=(np.inf if food_radius is None else food_radius),
max_food_health=max_food_health, food_rew_type=food_rew_type,
reward_scale=food_reward_scale, split_eat_between_agents=True)
keys_external += ['mask_af_obs', 'food_obs']
keys_copy.append('close_enough_to_food')
env = FoodIncreaseHealth(env, health_per_food_bounds=health_per_food_bounds)
env = TimeDecreaseHealth(env, health_per_step=health_per_step)
# Attack action should go before Food Health wrapper, since it masks eat action
env = AttackAction(env, attack_damage=attack_damage, attack_range=attack_range,
only_attack_in_front=only_attack_in_front)
env = ActionOptionsWrapper(env, ['action_attack_agent', 'action_eat_food'], {'action_attack_agent': -1, 'action_eat_food': 0})
env = ColorAgentsByOption(env, 'action_choose_option', ['action_attack_agent', 'action_eat_food', 'do_nothing'])
keys_self.append('previous_choice')
keys_other_agents.append('previous_choice')
keys_self_matrices.append('attacked_me')
keys_self.append('n_attacked_me')
keys_other_agents += ['attacked_me', 'n_attacked_me']
env = AgentHealthWrapper(env, max_health=max_agent_health, death_rew=death_rew,
steps_freeze_on_death=steps_freeze_on_death, life_rew=life_rew)
# This needs to come before options wrapper, so we can't group it above
env = AlwaysEatWrapper(env, agent_idx_allowed=np.arange(n_agents))
env = RUSPWrapper(env, **rusp_args)
env = RandomIdentityVector(env, vector_dim=id_dim)
env = SplitObservations(env, keys_self + keys_additional_self_vf,
keys_copy=keys_copy, keys_self_matrices=keys_self_matrices + keys_self_masks)
env = ConcatenateObsWrapper(env, {'other_agents': keys_other_agents,
'other_agents_vf': ['other_agents'] + keys_additional_other_agents_vf,
'additional_self_vf_obs': [k + '_self' for k in keys_additional_self_vf]})
env = DiscardMujocoExceptionEpisodes(env)
env = SelectKeysWrapper(env, keys_self=keys_self,
keys_other=keys_external + keys_copy + keys_self_masks)
env = OasisActionMasks(env, mask_all_when_dead=mask_all_when_dead)
return env