in Vehicle Routing Problem/src/vrp_environment.py [0:0]
def __orig_step(self, action):
done = False
self.info = {}
self.reward = -self.penalty_per_timestep
self.late_penalty = 0
a = [self.dr_x, self.dr_y]
action_type = None
translated_action = None
relevant_order_index = None
if action == 0: # Wait
action_type = 'wait'
elif action <= self.n_orders: # Accept an order
action_type = 'accept'
relevant_order_index = action - 1
elif action <= 2 * self.n_orders: # Pick up a specific order (and go to the corresponding restaurant for that)
relevant_order_index = action - self.n_orders - 1
action_type = 'pickup'
res_ordered_from = self.o_res_map[relevant_order_index]
b = [self.res_x[res_ordered_from], self.res_y[res_ordered_from]]
translated_action = vrp_action_go_from_a_to_b(a, b)
self.reward -= self.penalty_per_move
elif action <= 3 * self.n_orders: # Deliver the order
relevant_order_index = action - 2 * self.n_orders - 1
action_type = 'deliver'
b = [self.o_x[relevant_order_index], self.o_y[relevant_order_index]]
translated_action = vrp_action_go_from_a_to_b(a, b)
self.reward -= self.penalty_per_move
elif action <= 3 * self.n_orders + self.n_restaurants: # Return to a restaurant
action_type = 'return'
destination_res = action - 3 * self.n_orders - 1
b = [self.res_x[destination_res], self.res_y[destination_res]]
translated_action = vrp_action_go_from_a_to_b(a, b)
self.reward -= self.penalty_per_move
else:
raise Exception('Misaligned action space and step function for action {}'.format(action))
self.__update_driver_parameters(action_type, translated_action, relevant_order_index)
self.__update_environment_parameters()
state = self.__create_state()
# Update the clock
self.clock += 1
if self.clock >= self.episode_length:
done = True
self.info['no_late_penalty_reward'] = self.reward + self.late_penalty
return state, self.reward, done, self.info