def step()

in reagent/gym/envs/pomdp/pocman.py [0:0]


    def step(self, action):
        assert self.action_space.contains(action)
        assert self.done is False
        self.step_cnt += 1

        reward = -1
        next_pos = self.next_pos(self.internal_state.agent_pos, action)
        self.internal_state.agent_pos = next_pos

        if self.internal_state.power_duration > 0:
            self.internal_state.power_duration -= 1
        agent_in_power = self.internal_state.power_duration > 0

        hit_ghost = set()
        for g, ghost in enumerate(self.internal_state.ghosts):
            # check if the ghost hits the agent before and after it moves
            if ghost.pos == self.internal_state.agent_pos:
                hit_ghost.add(g)
            else:
                ghost.move(self.internal_state.agent_pos, agent_in_power)
                if ghost.pos == self.internal_state.agent_pos:
                    hit_ghost.add(g)

        hit_ghost = list(hit_ghost)
        for g in hit_ghost:
            if self.internal_state.power_duration > 0:
                reward += 25
                self.internal_state.ghosts[g].reset()
            else:
                reward += -100
                self.done = True
                break

        if self.step_cnt > self.board["_max_step"]:
            self.done = True

        if self._agent_at_food():
            reward += 10
            self.maze[
                self.internal_state.agent_pos.x, self.internal_state.agent_pos.y
            ] = Element.CLEAR_WALK_WAY
            if self._food_left() == 0:
                self.done = True

        if self._agent_at_power():
            self.internal_state.power_duration = self.board["_power_duration"]
            self.maze[
                self.internal_state.agent_pos.x, self.internal_state.agent_pos.y
            ] = Element.CLEAR_WALK_WAY
            reward += 10

        ob = self._make_ob()

        return ob, reward, self.done, {"state": self.internal_state}