in randomized_uncertain_social_preferences/rusp/env_prisoners_buddy.py [0:0]
def make_env(n_agents=5, horizon=50, horizon_lower=None, horizon_upper=None,
prob_per_step_to_stop=0.02,
choosing_period=5,
mutual_cooperate_rew=2, defected_against_rew=-2, successful_defect_rew=1,
agent_identity_dim=16,
rusp_args={}):
env = AbstractBaseEnv(n_agents)
env = RandomizedHorizonWrapper(env, lower_lim=horizon_lower or horizon, upper_lim=horizon_upper or horizon,
prob_per_step_to_stop=prob_per_step_to_stop)
env = RandomIdentityVector(env, vector_dim=agent_identity_dim)
env = PrisonersBuddy(env, choosing_period=choosing_period,
agent_identity_dim=agent_identity_dim,
mutual_cooperate_rew=mutual_cooperate_rew, defected_against_rew=defected_against_rew,
successful_defect_rew=successful_defect_rew)
env = ActionOptionsWrapper(env, ['action_choose_agent'], {'action_choose_agent': -1})
env = RUSPWrapper(env, **rusp_args)
keys_self = ['previous_choice',
'next_choice_is_real',
'i_chose_any_rew',
'agent_identity',
'previous_choice_identity',
'timestep']
keys_additional_self_vf = ['fraction_episode_done', 'horizon']
keys_other_agents = [
'previous_choice',
'chose_me',
'i_chose',
'chose_me_rew',
'i_chose_rew',
'i_chose_any_rew',
'agent_identity',
'previous_choice_identity'
]
keys_additional_other_agents_vf = []
keys_self_matrices = ['chose_me',
'i_chose',
'chose_me_rew',
'i_chose_rew']
keys_external = ['other_agents',
'other_agents_vf',
'additional_self_vf_obs']
add_rew_share_observation_keys(keys_self=keys_self,
keys_additional_self_vf=keys_additional_self_vf,
keys_other_agents=keys_other_agents,
keys_additional_other_agents_vf=keys_additional_other_agents_vf,
keys_self_matrices=keys_self_matrices,
**rusp_args)
env = SplitObservations(env, keys_self + keys_additional_self_vf,
keys_copy=[], keys_self_matrices=keys_self_matrices)
env = ConcatenateObsWrapper(env, {'other_agents': keys_other_agents,
'other_agents_vf': ['other_agents'] + keys_additional_other_agents_vf,
'additional_self_vf_obs': [k + '_self' for k in keys_additional_self_vf]})
env = SelectKeysWrapper(env, keys_self=keys_self,
keys_other=keys_external)
return env