def step()

in qlearn/envs/nchain.py [0:0]


    def step(self, action):
        assert self.action_space.contains(action)
        v = np.arange(self.n)
        reward = lambda s, a: 1.0 if (s == (self.n - 1) and a == 1) else (0.001 if (s == 0 and a == 0) else 0)
        is_done = lambda nsteps: nsteps >= self.max_nsteps

        r = reward(self.state, action)
        if action:    # forward
            if self.state != self.n - 1:
                self.state += 1
        else:   # backward
            if self.state != 0:
                self.state -= 1
        self.nsteps += 1
        return (v <= self.state).astype('float32'), r, is_done(self.nsteps), None