def _precompute_observations()

in randomized_uncertain_social_preferences/rusp/wrappers_rusp.py [0:0]


    def _precompute_observations(self, n_agents):
        '''
            Precompute observations since they are static per episode.
        '''
        # We have independent noisy observations per agents, so we copy the reward matrix n_agents times and
        #   then add the noise matrices
        rew_mats = np.repeat(np.expand_dims(self.unnormalized_reward_xform_mat, 0), n_agents, axis=0)
        noisy_rew_mats = rew_mats + self.noise
        self.precomputed_obs = {}

        def _index_into_mats(key, *indices):
            '''
                Helper function to create 3 observation types with the same indices
            '''
            self.precomputed_obs[key] = rew_mats[indices]  # Non-noisy version of the reward matrix
            self.precomputed_obs[key + "_noisy"] = noisy_rew_mats[indices]  # Noisy version of the reward matrix
            self.precomputed_obs[key + '_noise_level'] = self.noise_std[indices]  # Noise level associated with each entry in the noisy reward matrices

        def _transpose_existing(new_key, existing_key):
            '''
                Helper function to transpose all 3 observations for an key. This is useful if an agent policy
                    or value function needs to observe what other agents observe about it.
            '''
            self.precomputed_obs[new_key] = self.precomputed_obs[existing_key].T
            self.precomputed_obs[new_key + "_noisy"] = self.precomputed_obs[existing_key + "_noisy"].T
            self.precomputed_obs[new_key + '_noise_level'] = self.precomputed_obs[existing_key + '_noise_level'].T

        # Relationship variable of myself (What is the weight over my own reward) with my own noise variable.
        #   This is in effect the 3D diagonal, so the output shape will be (n_agents,)
        _index_into_mats('self_rew_value', np.arange(n_agents), np.arange(n_agents), np.arange(n_agents))

        # Relationship variable of other agents weight over their own reward with my own noise variable (s)
        #   Row i is the diagonal of the ith matrix
        _index_into_mats('other_rew_value_s', slice(None), np.arange(n_agents), np.arange(n_agents))

        # My relationship variable with other agents (so) with my noise (s)
        #   Row i is row i of the ith matrix
        _index_into_mats('rew_share_so_s', np.arange(n_agents), np.arange(n_agents), slice(None))

        # Others relationship variable with me (os) with their noise (o)
        #   Should only be used in the value function
        _transpose_existing('rew_share_os_o', 'rew_share_so_s')