def _step()

in gym/gym/envs/algorithmic/algorithmic_env.py [0:0]


    def _step(self, action):
        assert self.action_space.contains(action)
        self.last_action = action
        inp_act, out_act, pred = action
        done = False
        reward = 0.0
        self.time += 1
        assert 0 <= self.write_head_position
        if out_act == 1:
            try:
                correct = pred == self.target[self.write_head_position]
            except IndexError:
                logger.warn("It looks like you're calling step() even though this "+
                    "environment has already returned done=True. You should always call "+
                    "reset() once you receive done=True. Any further steps are undefined "+
                    "behaviour.")
                correct = False
            if correct:
                reward = 1.0
            else:
                # Bail as soon as a wrong character is written to the tape
                reward = -0.5
                done = True
            self.write_head_position += 1
            if self.write_head_position >= len(self.target):
                done = True
        self._move(inp_act)
        if self.time > self.time_limit:
            reward = -1.0
            done = True
        obs = self._get_obs()
        self.last_reward = reward
        self.episode_total_reward += reward
        return (obs, reward, done, {})