in randomized_uncertain_social_preferences/rusp/env_ipd.py [0:0]
def make_env(horizon=10, horizon_lower=None, horizon_upper=None,
prob_per_step_to_stop=0.1, # If set then we play the infinite game,
mutual_cooperate=2, defected_against=-2, successful_defect=4, mutual_defect=0,
# Evals
against_all_c=False, against_all_d=False, against_tft=False,
# Random Teams
rusp_args={}):
env = AbstractBaseEnv(2)
env = RandomizedHorizonWrapper(env, lower_lim=horizon_lower or horizon, upper_lim=horizon_upper or horizon,
prob_per_step_to_stop=prob_per_step_to_stop)
# Construct Payoff Matrix
cc = [mutual_cooperate, mutual_cooperate]
cd = [defected_against, successful_defect]
dc = list(reversed(cd))
dd = [mutual_defect, mutual_defect]
payoff_matrix = np.array([[cc, cd],
[dc, dd]])
env = IteratedMatrixGameWrapper(env, payoff_matrix=payoff_matrix)
env = RUSPWrapper(env, **rusp_args)
keys_self = ['prev_ac', 'timestep']
keys_additional_self_vf = ['fraction_episode_done', 'horizon']
keys_other_agents = ['prev_ac']
keys_additional_other_agents_vf = []
keys_self_matrices = []
add_rew_share_observation_keys(keys_self=keys_self,
keys_additional_self_vf=keys_additional_self_vf,
keys_other_agents=keys_other_agents,
keys_additional_other_agents_vf=keys_additional_other_agents_vf,
keys_self_matrices=keys_self_matrices,
**rusp_args)
keys_external = ['other_agents',
'other_agents_vf',
'additional_self_vf_obs']
env = SplitObservations(env, keys_self + keys_additional_self_vf,
keys_copy=[], keys_self_matrices=keys_self_matrices)
env = ConcatenateObsWrapper(env, {'other_agents': keys_other_agents,
'other_agents_vf': ['other_agents'] + keys_additional_other_agents_vf,
'additional_self_vf_obs': [k + '_self' for k in keys_additional_self_vf]})
env = SelectKeysWrapper(env, keys_self=keys_self,
keys_other=keys_external)
if against_all_c or against_all_d or against_tft:
if against_all_c:
policy_to_play = 'allc'
elif against_all_d:
policy_to_play = 'alld'
elif against_tft:
policy_to_play = 'tft'
env = LastAgentScripted(env, policy_to_play)
return env