def __orig_step()

in Vehicle Routing Problem/src/vrp_environment.py [0:0]


    def __orig_step(self, action):

        done = False
        self.info = {}
        self.reward = -self.penalty_per_timestep
        self.late_penalty = 0

        a = [self.dr_x, self.dr_y]

        action_type = None
        translated_action = None
        relevant_order_index = None

        if action == 0:  # Wait
            action_type = 'wait'
        elif action <= self.n_orders:  # Accept an order
            action_type = 'accept'
            relevant_order_index = action - 1
        elif action <= 2 * self.n_orders:  # Pick up a specific order (and go to the corresponding restaurant for that)
            relevant_order_index = action - self.n_orders - 1
            action_type = 'pickup'
            res_ordered_from = self.o_res_map[relevant_order_index]
            b = [self.res_x[res_ordered_from], self.res_y[res_ordered_from]]
            translated_action = vrp_action_go_from_a_to_b(a, b)
            self.reward -= self.penalty_per_move
        elif action <= 3 * self.n_orders:  # Deliver the order
            relevant_order_index = action - 2 * self.n_orders - 1
            action_type = 'deliver'
            b = [self.o_x[relevant_order_index], self.o_y[relevant_order_index]]
            translated_action = vrp_action_go_from_a_to_b(a, b)
            self.reward -= self.penalty_per_move
        elif action <= 3 * self.n_orders + self.n_restaurants:  # Return to a restaurant
            action_type = 'return'
            destination_res = action - 3 * self.n_orders - 1
            b = [self.res_x[destination_res], self.res_y[destination_res]]
            translated_action = vrp_action_go_from_a_to_b(a, b)
            self.reward -= self.penalty_per_move
        else:
            raise Exception('Misaligned action space and step function for action {}'.format(action))

        self.__update_driver_parameters(action_type, translated_action, relevant_order_index)
        self.__update_environment_parameters()
        state = self.__create_state()

        # Update the clock
        self.clock += 1
        if self.clock >= self.episode_length:
            done = True

        self.info['no_late_penalty_reward'] = self.reward + self.late_penalty

        return state, self.reward, done, self.info