def __update_environment_parameters()

in Vehicle Routing Problem/src/vrp_environment.py [0:0]


    def __update_environment_parameters(self):
        # Update the waiting times
        for o in range(self.n_orders):
            # if this is an active order, increase the waiting time
            if self.o_status[o] >= 1:
                self.o_time[o] += 1

        # Check if any order expires
        reward_no_penalty = self.reward
        self.info["RewardNoPenalty"] = reward_no_penalty
        for o in range(self.n_orders):
            if self.o_time[o] >= self.order_promise:
                # Incur the cost to the driver who had accepted the order
                if self.o_status[o] >= 2:
                    # Give order miss penalty and take rewards back that were given during accept and pickup
                    self.reward = (self.reward
                                   - self.order_miss_penalty
                                   - self.reward_per_order[o] * (self.o_status[o] == 2) / 3
                                   - self.reward_per_order[o] * (self.o_status[o] == 3) * 2 / 3)

                    self.late_penalty += self.order_miss_penalty
                    if self.o_status[o] == 3:
                        self.dr_used_capacity -= 1
                self.__reset_order(o)

        # Check if any open order is taken by some other driver
        for o in range(self.n_orders):
            if self.o_status[o] == 1 and np.random.random(1)[0] < self.order_timeout_prob:
                self.__reset_order(o)

        # Create new orders
        for o in range(self.n_orders):
            if self.o_status[o] == 0:
                # Flip a coin to create an order
                if np.random.random(1)[0] < self.order_prob:
                    # Choose a zone
                    zone = np.random.choice(self.num_zones, p=self.order_probs_per_zone)
                    o_x, o_y, from_rest, order_reward = self.__receive_order(zone)
                    self.o_status[o] = 1
                    self.o_time[o] = 0
                    self.o_res_map[o] = from_rest
                    self.o_x[o] = o_x
                    self.o_y[o] = o_y
                    self.reward_per_order[o] = order_reward