in randomized_uncertain_social_preferences/rusp/wrappers_rusp.py [0:0]
def _precompute_observations(self, n_agents):
'''
Precompute observations since they are static per episode.
'''
# We have independent noisy observations per agents, so we copy the reward matrix n_agents times and
# then add the noise matrices
rew_mats = np.repeat(np.expand_dims(self.unnormalized_reward_xform_mat, 0), n_agents, axis=0)
noisy_rew_mats = rew_mats + self.noise
self.precomputed_obs = {}
def _index_into_mats(key, *indices):
'''
Helper function to create 3 observation types with the same indices
'''
self.precomputed_obs[key] = rew_mats[indices] # Non-noisy version of the reward matrix
self.precomputed_obs[key + "_noisy"] = noisy_rew_mats[indices] # Noisy version of the reward matrix
self.precomputed_obs[key + '_noise_level'] = self.noise_std[indices] # Noise level associated with each entry in the noisy reward matrices
def _transpose_existing(new_key, existing_key):
'''
Helper function to transpose all 3 observations for an key. This is useful if an agent policy
or value function needs to observe what other agents observe about it.
'''
self.precomputed_obs[new_key] = self.precomputed_obs[existing_key].T
self.precomputed_obs[new_key + "_noisy"] = self.precomputed_obs[existing_key + "_noisy"].T
self.precomputed_obs[new_key + '_noise_level'] = self.precomputed_obs[existing_key + '_noise_level'].T
# Relationship variable of myself (What is the weight over my own reward) with my own noise variable.
# This is in effect the 3D diagonal, so the output shape will be (n_agents,)
_index_into_mats('self_rew_value', np.arange(n_agents), np.arange(n_agents), np.arange(n_agents))
# Relationship variable of other agents weight over their own reward with my own noise variable (s)
# Row i is the diagonal of the ith matrix
_index_into_mats('other_rew_value_s', slice(None), np.arange(n_agents), np.arange(n_agents))
# My relationship variable with other agents (so) with my noise (s)
# Row i is row i of the ith matrix
_index_into_mats('rew_share_so_s', np.arange(n_agents), np.arange(n_agents), slice(None))
# Others relationship variable with me (os) with their noise (o)
# Should only be used in the value function
_transpose_existing('rew_share_os_o', 'rew_share_so_s')