def make_env()

in randomized_uncertain_social_preferences/rusp/env_oasis.py [0:0]


def make_env(n_substeps=15, n_agents=3,
             floor_size=[1.5, 6], action_lims=(-0.9, 0.9),
             grid_size=60, other_friction=0.01, box_floor_friction=0.2, gravity=[0, 0, -50],
             horizon=1000, horizon_lower=None, horizon_upper=None, prob_per_step_to_stop=0.001,
             # Food
             n_food=1, n_food_cluster=1, food_radius=0.4,
             food_respawn_time=0, max_food_health=5, food_together_radius=0.4,
             food_rew_type='selfish', food_reward_scale=0.0,
             # Health
             max_agent_health=20, health_per_food_bounds=[2.1, 2.7], health_per_step=-1.0,
             # Attacking
             attack_range=0.7, attack_damage=-5.0, only_attack_in_front=True,
             # Death
             life_rew=1, death_rew=-100, steps_freeze_on_death=100,
             # Random Teams
             rusp_args={},
             # ID
             id_dim=16,
             # Action Masking
             mask_all_when_dead=True):
    env = Base(n_agents=n_agents,
               n_substeps=n_substeps,
               floor_size=floor_size,
               horizon=99999999999999,  # Just a big number so actual horizon is done by RandomizedHorizonWrapper
               action_lims=action_lims,
               deterministic_mode=False,
               grid_size=grid_size)
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))

    env.add_module(Agents(n_agents,
                          placement_fn=uniform_placement,
                          friction=other_friction))

    # Food
    env.metadata['food_together_radius'] = food_together_radius

    assert n_food % n_food_cluster == 0
    cluster_assignments = np.repeat(np.arange(0, n_food, n_food // n_food_cluster), n_food // n_food_cluster)
    food_placement = [close_to_other_object_placement(
        "food", i, "food_together_radius") for i in cluster_assignments]
    food_placement[::n_food // n_food_cluster] = [uniform_placement] * n_food_cluster

    env.add_module(Food(n_food, placement_fn=food_placement))

    env.reset()

    keys_self = [
        'agent_qpos_qvel',
        'agent_identity',
        'agent_health',
        'is_dead',
        'time_to_alive',
        'timestep'
    ]
    keys_additional_self_vf = ['fraction_episode_done', 'horizon']
    keys_copy = ['mask_is_dead']
    keys_other_agents = [
        'agent_qpos_qvel',
        'agent_identity',
        'agent_health',
        'is_dead',
        'time_to_alive',
    ]
    keys_additional_other_agents_vf = []
    keys_self_matrices = []

    add_rew_share_observation_keys(keys_self=keys_self,
                                   keys_additional_self_vf=keys_additional_self_vf,
                                   keys_other_agents=keys_other_agents,
                                   keys_additional_other_agents_vf=keys_additional_other_agents_vf,
                                   keys_self_matrices=keys_self_matrices,
                                   **rusp_args)

    keys_external = ['other_agents',
                     'other_agents_vf',
                     'additional_self_vf_obs']

    keys_self_masks = ['mask_aa_obs']

    env = SplitMultiAgentActions(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    env = AgentAgentObsMask2D(env)

    env = ZeroRews(env)

    env = RandomizedHorizonWrapper(env, lower_lim=horizon_lower or horizon, upper_lim=horizon_upper or horizon,
                                   prob_per_step_to_stop=prob_per_step_to_stop)

    env = FoodHealthWrapper(env, respawn_time=(np.inf if food_respawn_time is None else food_respawn_time),
                            eat_thresh=(np.inf if food_radius is None else food_radius),
                            max_food_health=max_food_health, food_rew_type=food_rew_type,
                            reward_scale=food_reward_scale, split_eat_between_agents=True)
    keys_external += ['mask_af_obs', 'food_obs']
    keys_copy.append('close_enough_to_food')

    env = FoodIncreaseHealth(env, health_per_food_bounds=health_per_food_bounds)
    env = TimeDecreaseHealth(env, health_per_step=health_per_step)

    # Attack action should go before Food Health wrapper, since it masks eat action
    env = AttackAction(env, attack_damage=attack_damage, attack_range=attack_range,
                       only_attack_in_front=only_attack_in_front)
    env = ActionOptionsWrapper(env, ['action_attack_agent', 'action_eat_food'], {'action_attack_agent': -1, 'action_eat_food': 0})
    env = ColorAgentsByOption(env, 'action_choose_option', ['action_attack_agent', 'action_eat_food', 'do_nothing'])
    keys_self.append('previous_choice')
    keys_other_agents.append('previous_choice')
    keys_self_matrices.append('attacked_me')
    keys_self.append('n_attacked_me')
    keys_other_agents += ['attacked_me', 'n_attacked_me']

    env = AgentHealthWrapper(env, max_health=max_agent_health, death_rew=death_rew,
                             steps_freeze_on_death=steps_freeze_on_death, life_rew=life_rew)

    # This needs to come before options wrapper, so we can't group it above
    env = AlwaysEatWrapper(env, agent_idx_allowed=np.arange(n_agents))

    env = RUSPWrapper(env, **rusp_args)

    env = RandomIdentityVector(env, vector_dim=id_dim)

    env = SplitObservations(env, keys_self + keys_additional_self_vf,
                            keys_copy=keys_copy, keys_self_matrices=keys_self_matrices + keys_self_masks)
    env = ConcatenateObsWrapper(env, {'other_agents': keys_other_agents,
                                      'other_agents_vf': ['other_agents'] + keys_additional_other_agents_vf,
                                      'additional_self_vf_obs': [k + '_self' for k in keys_additional_self_vf]})
    env = DiscardMujocoExceptionEpisodes(env)
    env = SelectKeysWrapper(env, keys_self=keys_self,
                            keys_other=keys_external + keys_copy + keys_self_masks)
    env = OasisActionMasks(env, mask_all_when_dead=mask_all_when_dead)
    return env