def make_env()

in randomized_uncertain_social_preferences/rusp/env_indirect_reciprocity.py [0:0]
65 lines of code
11 McCabe index (conditional complexity)

def make_env(n_agents=3,
             # Horizon
             horizon=20, horizon_lower=None, horizon_upper=None,
             prob_per_step_to_stop=0.05,
             # Matrix Payouts
             mutual_cooperate=2, defected_against=-2, successful_defect=4, mutual_defect=0,
             # Agent IDs
             agent_identity_dim=16,
             # Evals
             against_all_c=False, against_all_d=False, against_tft=False,
             last_step_first_agent_vs_last_agent=False, last_agent_always_plays=False,
             last_doesnt_play_until_t=None,
             last_must_play_at_t=False,
             # RUSP
             rusp_args={}):
    env = AbstractBaseEnv(n_agents)

    env = RandomizedHorizonWrapper(env, lower_lim=horizon_lower or horizon, upper_lim=horizon_upper or horizon,
                                   prob_per_step_to_stop=prob_per_step_to_stop)

    env = RandomIdentityVector(env, vector_dim=agent_identity_dim)

    env = ChooseAgentsToPlay(env, last_step_first_agent_vs_last_agent=last_step_first_agent_vs_last_agent,
                             last_agent_always_plays=last_agent_always_plays,
                             last_doesnt_play_until_t=last_doesnt_play_until_t,
                             last_must_play_at_t=last_must_play_at_t)

    # Construct Payoff Matrix
    cc = [mutual_cooperate, mutual_cooperate]
    cd = [defected_against, successful_defect]
    dc = list(reversed(cd))
    dd = [mutual_defect, mutual_defect]
    payoff_matrix = np.array([[cc, cd],
                              [dc, dd]])
    env = MultiPlayerIteratedMatrixGame(env, payoff_matrix=payoff_matrix)

    env = RUSPWrapper(env, **rusp_args)

    keys_self = ['prev_ac',
                 'you_played',
                 'youre_playing',
                 'agent_identity',
                 'timestep']
    keys_additional_self_vf = ['fraction_episode_done', 'horizon']

    keys_other_agents = ['prev_ac', 'youre_playing', 'agent_identity']

    keys_additional_other_agents_vf = []
    keys_self_matrices = []
    add_rew_share_observation_keys(keys_self=keys_self,
                                   keys_additional_self_vf=keys_additional_self_vf,
                                   keys_other_agents=keys_other_agents,
                                   keys_additional_other_agents_vf=keys_additional_other_agents_vf,
                                   keys_self_matrices=keys_self_matrices,
                                   **rusp_args)
    keys_external = ['other_agents',
                     'other_agents_vf',
                     'additional_self_vf_obs']

    keys_copy = []

    env = SplitObservations(env, keys_self + keys_additional_self_vf,
                            keys_copy=keys_copy, keys_self_matrices=keys_self_matrices)

    env = ConcatenateObsWrapper(env, {'other_agents': keys_other_agents,
                                      'other_agents_vf': ['other_agents'] + keys_additional_other_agents_vf,
                                      'additional_self_vf_obs': [k + '_self' for k in keys_additional_self_vf]})

    env = SelectKeysWrapper(env, keys_self=keys_self,
                            keys_other=keys_external + keys_copy + ['youre_playing_self'])  # need to copy youre_playing_self through for the LastAgentScripted wrapper

    if against_all_c or against_all_d or against_tft:
        if against_all_c:
            policy_to_play = 'allc'
        elif against_all_d:
            policy_to_play = 'alld'
        elif against_tft:
            policy_to_play = 'tft'