def step()

in randomized_uncertain_social_preferences/rusp/env_prisoners_buddy.py [0:0]
45 lines of code
13 McCabe index (conditional complexity)

    def step(self, action):
        obs, rew, done, info = self.env.step(action)
        self._chose_me = np.zeros((self.n_agents, self.n_agents), dtype=bool)
        targets = np.ones(self.n_agents, dtype=int) * -1
        for i in range(self.n_agents):
            target = self._get_target_actor(i, action)
            if len(target):
                targets[i] = target[0]
                self._chose_me[target[0], i] = 1

        self._previous_choice_identity = obs['agent_identity'][targets]
        self._previous_choice_identity[targets == -1] = 0

        # Reward rounds
        if self._t % self.choosing_period == 0:
            self._both_chose = self._chose_me * self._chose_me.T
            self._chose_me_rew = self._chose_me.copy()

            self._teams = np.argmax(self._both_chose, axis=1)  # Indicies of teamate
            self._teams[np.all(self._both_chose == 0, axis=1)] = -1  # Make sure those without team are set to -1 instead of 0

            rew = self._prisoners_buddy_reward_update(rew)

            # Track stats
            self._n_times_not_chosen[np.sum(self._chose_me, 1) == 0] += 1
            # Since both_chose is symmetric, just get the index of nonzero entry in upper triangle
            current_team_indices = np.c_[np.nonzero(np.triu(self._both_chose))]
            current_team_tuples = list(map(tuple, current_team_indices))
            teams_done = [k for k in self._current_team_lengths.keys() if k not in current_team_tuples]

            for team_done in teams_done:
                self._team_lengths.append(self._current_team_lengths[team_done])
                del self._current_team_lengths[team_done]
            for current_team_tuple in current_team_tuples:
                self._current_team_lengths[current_team_tuple] += 1

            self._i_chose_any_rew_obs = np.any(self._chose_me_rew, 0)[:, None]

            if self._first_choice:
                self._first_choice = False
            else:
                all_teams_didnt_change = np.all(self._previous_teams == self._teams)
                max_number_of_teams_filled = np.sum(self._teams != -1) == ((self.n_agents // 2) * 2)
                self._perfect_game = self._perfect_game and all_teams_didnt_change and max_number_of_teams_filled

            self._previous_teams = self._teams

        self._t += 1

        if done:
            self._team_lengths += list(self._current_team_lengths.values())
            info['average_team_length'] = np.mean(self._team_lengths) if len(self._team_lengths) else 0
            info['n_times_team_changed'] = np.sum(self._n_times_team_changed)
            info['n_agents_on_team_per_step'] = np.mean(self._n_agents_on_team)
            info['number_decisions'] = self._t / self.choosing_period
            info['n_unique_not_chosen'] = np.sum(self._n_times_not_chosen > 0)
            info['n_successful_defections'] = self._n_successful_defections
            info['perfect_game'] = self._perfect_game

        return self.observation(obs), rew, done, info