in Vehicle Routing Problem/src/vrp_environment.py [0:0]
def __update_environment_parameters(self):
# Update the waiting times
for o in range(self.n_orders):
# if this is an active order, increase the waiting time
if self.o_status[o] >= 1:
self.o_time[o] += 1
# Check if any order expires
reward_no_penalty = self.reward
self.info["RewardNoPenalty"] = reward_no_penalty
for o in range(self.n_orders):
if self.o_time[o] >= self.order_promise:
# Incur the cost to the driver who had accepted the order
if self.o_status[o] >= 2:
# Give order miss penalty and take rewards back that were given during accept and pickup
self.reward = (self.reward
- self.order_miss_penalty
- self.reward_per_order[o] * (self.o_status[o] == 2) / 3
- self.reward_per_order[o] * (self.o_status[o] == 3) * 2 / 3)
self.late_penalty += self.order_miss_penalty
if self.o_status[o] == 3:
self.dr_used_capacity -= 1
self.__reset_order(o)
# Check if any open order is taken by some other driver
for o in range(self.n_orders):
if self.o_status[o] == 1 and np.random.random(1)[0] < self.order_timeout_prob:
self.__reset_order(o)
# Create new orders
for o in range(self.n_orders):
if self.o_status[o] == 0:
# Flip a coin to create an order
if np.random.random(1)[0] < self.order_prob:
# Choose a zone
zone = np.random.choice(self.num_zones, p=self.order_probs_per_zone)
o_x, o_y, from_rest, order_reward = self.__receive_order(zone)
self.o_status[o] = 1
self.o_time[o] = 0
self.o_res_map[o] = from_rest
self.o_x[o] = o_x
self.o_y[o] = o_y
self.reward_per_order[o] = order_reward