def step()

in News Vendor/src/news_vendor_environment.py [0:0]


    def step(self, action):
        done = False
        inv_state, p, c, h, k, mu = self.break_state()

        buys = max(0, min(action[0], max(0, self.max_level - np.sum(inv_state))))

        demand_realization = np.random.poisson(mu)
        # Compute Reward
        on_hand = inv_state[0]
        if self.l == 0:
            on_hand += buys
        sales = min(on_hand, demand_realization)
        sales_revenue = p * sales
        overage = max(0, on_hand - demand_realization)
        underage = max(0, demand_realization - on_hand)
        #        purchase_cost = c * buys
        purchase_cost = self.gamma ** self.l * c * buys
        holding = overage * h
        penalty_lost_sale = k * underage
        reward = sales_revenue - purchase_cost - holding - penalty_lost_sale

        new_state = np.copy(self.state)
        buys = max(0, min(self.max_level - on_hand, buys))
        if self.l > 1:
            new_state[:self.inv_dim - 1] = np.copy(self.state[1:self.inv_dim])
            new_state[self.l - 1] = buys
            new_state[0] += overage
        elif self.l == 1:
            new_state[0] = overage + buys
        else:
            new_state[0] = overage

        self.step_count += 1
        if self.step_count >= self.max_steps:
            done = True

        # reward = reward/100.0 #reduce rewards to smaller values
        self.state = np.copy(new_state)
        info = {'demand realization': demand_realization, 'sales': sales, 'underage': underage, 'overage': overage}
        return new_state, reward, done, info