def step()

in nevergrad/functions/rl/envs.py [0:0]
28 lines of code
20 McCabe index (conditional complexity)

    def step(self, action_dict: tp.Dict[str, int]) -> base.StepReturn:
        """Returns observations from ready agents.
        The returns are dicts mapping from agent_id strings to values. The
        number of agents in the env can vary over time.
        Returns
        -------
            obs (dict): New observations for each ready agent.
            rewards (dict): Reward values for each ready agent. If the
                episode is just started, the value will be None.
            dones (dict): Done values for each ready agent. The special key
                "__all__" (required) is used to indicate env termination.
            infos (dict): Optional info values for each agent id.
        """
        if self.verbose:
            strings: tp.List[str] = []
            for k in range(2):
                action = JamesBond.actions[action_dict[f"player_{k}"]]
                strings.append(f"Player {k} {self.players[k].get_state()}: {action}")
            print(" - ".join(strings))
        actions = [JamesBond.actions[action_dict[f"player_{k}"]] for k in range(2)]
        self._step += 1
        info: tp.Dict[tp.Any, tp.Any] = {}
        rew = {"player_0": 0, "player_1": 0}
        # change impossible actions
        actions = [
            "reload" if a == "fire" and not p.ammunitions else a for p, a in zip(self.players, actions)
        ]
        # update players
        for player, action in zip(self.players, actions):
            player.update_with_action(action)
        # main way to win
        if actions[0] == "fire" and actions[1] == "reload":
            rew = {"player_0": 1, "player_1": 0}
        elif actions[0] == "reload" and actions[1] == "fire":
            rew = {"player_0": 0, "player_1": 1}
        # lose if you keep protecting
        if any(p.consecutive_protect > JamesBond.max_consecutive_protect for p in self.players):
            if self.players[0].consecutive_protect > self.players[1].consecutive_protect:
                rew = {"player_0": 0, "player_1": 1}
            elif self.players[1].consecutive_protect > self.players[0].consecutive_protect:
                rew = {"player_0": 1, "player_1": 0}
            # if both keep protecting... well, it goes on...
        obs = self._make_observations()
        done = {"__all__": self._step == 100 or sum(abs(x) for x in rew.values()) > 0}
        return obs, rew, done, info