in Vehicle Routing Problem/src/vrp_environment.py [0:0]
def __init__(self, env_config={}):
self.vrp_view = None
config_defaults = {'n_restaurants': 2,
'n_orders': 10,
'order_prob': 0.5,
'driver_capacity': 4,
'map_quad': (5, 5),
'order_promise': 60,
'order_timeout_prob': 0.15,
'episode_length': 1000,
'num_zones': 4,
'order_probs_per_zone': (0.1, 0.5, 0.3, 0.1),
'order_reward_min': (8, 5, 2, 1),
'order_reward_max': (12, 8, 5, 3),
'half_norm_scale_reward_per_zone': (0.5, 0.5, 0.5, 0.5),
'penalty_per_timestep': 0.1,
'penalty_per_move': 0.1,
'order_miss_penalty': 50}
for key, val in config_defaults.items():
val = env_config.get(key, val) # Override defaults with constructor parameters
self.__dict__[key] = val
if key not in env_config:
env_config[key] = val
assert len(self.order_probs_per_zone) == self.num_zones
assert isclose(sum(self.order_probs_per_zone), 1.0)
self.csv_file = '/opt/ml/output/data/vrp_rewards.csv'
self.dr_used_capacity = 0
self.o_x = []
self.o_y = []
self.o_status = []
self.o_res_map = []
self.o_time = []
self.reward_per_order = []
self.dr_x = None
self.dr_y = None
self.game_over = False
self.state = []
self.reward = None
self.clock = 0
# map boundaries
self.map_min_x = - self.map_quad[0]
self.map_max_x = + self.map_quad[0]
self.map_min_y = - self.map_quad[1]
self.map_max_y = + self.map_quad[1]
self.map_range_x = range(-self.map_max_x, self.map_max_x + 1)
self.map_range_y = range(-self.map_max_y, self.map_max_y + 1)
# zone boundaries
self.zone_range_x = np.array_split(np.array(self.map_range_x), self.num_zones)
# restaurant x position limits
res_x_min = [self.map_min_x] * self.n_restaurants
res_x_max = [self.map_max_x] * self.n_restaurants
# restaurant y position limits
res_y_min = [self.map_min_y] * self.n_restaurants
res_y_max = [self.map_max_y] * self.n_restaurants
# driver x position limits
dr_x_min = [self.map_min_x]
dr_x_max = [self.map_max_x]
# driver y position limits
dr_y_min = [self.map_min_y]
dr_y_max = [self.map_max_y]
dr_used_capacity_min = [0]
dr_used_capacity_max = [self.driver_capacity]
# n_orders for x position limits
o_x_min = [self.map_min_x] * self.n_orders
o_x_max = [self.map_max_x] * self.n_orders
# n_orders for y position limits
o_y_min = [self.map_min_y] * self.n_orders
o_y_max = [self.map_max_y] * self.n_orders
# order status: 0 - inactive(not created, cancelled, delivered), 1 - open, 2 - accepted, 3 - picked-up
o_status_min = [0] * self.n_orders
o_status_max = [3] * self.n_orders
# Reward per order
reward_per_order_min = [0] * self.n_orders
reward_per_order_max = [max(self.order_reward_max)] * self.n_orders
# order-restaurant mapping, i.e. which the order belongs to which restaurant
o_res_map_min = [-1] * self.n_orders
o_res_map_max = [self.n_restaurants - 1] * self.n_orders
# time elapsed since the order has been placed
o_time_min = [0] * self.n_orders
o_time_max = [self.order_promise] * self.n_orders
# Create the observation space
orig_observation_space = Box(low=np.array(res_x_min +
res_y_min +
dr_x_min +
dr_y_min +
dr_used_capacity_min +
[self.driver_capacity] +
o_x_min +
o_y_min +
o_status_min +
o_res_map_min +
o_time_min +
reward_per_order_min
),
high=np.array(res_x_max +
res_y_max +
dr_x_max +
dr_y_max +
dr_used_capacity_max +
[self.driver_capacity] +
o_x_max +
o_y_max +
o_status_max +
o_res_map_max +
o_time_max +
reward_per_order_max
),
dtype=np.int16
)
# number of possible actions
# Wait, Accept Order i, pick up order i, deliver order i, return to restaurant j
self.max_avail_actions = 1 + 3 * self.n_orders + self.n_restaurants
self.observation_space = Dict({
# a mask of valid actions (e.g., [0, 0, 1, 0, 0, 1] for 6 max avail)
"action_mask": Box(
0,
1,
shape=(self.max_avail_actions,),
dtype=np.float32),
"real_obs": orig_observation_space
}
)
self.action_space = Discrete(self.max_avail_actions)