randomized_uncertain_social_preferences/rusp/env_indirect_reciprocity.py [84:90]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            get to observe the last action each agent took. Agents are rewarded according to
            payoff_matrix.
        Args:
            payoff_matrix (2x2x2 np.ndarray): the payoff payoff_matrix. We index into this payoff_matrix
                according to agent actions.

        Observations:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



randomized_uncertain_social_preferences/rusp/env_ipd.py [16:21]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            get to observe the last action each agent took. Agents are rewarded according to
            payoff_matrix.
        Args:
            payoff_matrix (2x2x2 np.ndarray): the payoff payoff_matrix. We index into this payoff_matrix
                according to agent actions.
        Observations:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



