def __update_driver_parameters()

in archived/rl_traveling_salesman_vehicle_routing_coach/src/VRP_abstract_env.py [0:0]


    def __update_driver_parameters(self, action):
        if action == 0:  # no action
            pass
        elif action == 1:  # UP
            self.dr_y = min(self.map_max_y, self.dr_y + 1)
        elif action == 2:  # DOWN
            self.dr_y = max(self.map_min_y, self.dr_y - 1)
        elif action == 3:  # LEFT
            self.dr_x = max(self.map_min_x, self.dr_x - 1)
        elif action == 4:  # RIGHT
            self.dr_x = min(self.map_max_x, self.dr_x + 1)
        elif action > 4:  # accept order i
            o = action - 5  # order index
            # if order is open and driver has capacity, accept it
            if self.o_status[o] == 1 and self.dr_used_capacity < self.driver_capacity:
                self.o_status[o] = 2
                self.dr_used_capacity += 1

        # Check for pick-ups for each order accepted by the driver but not picked up/delivered yet.
        for r in range(self.n_restaurants):
            res_x = self.res_x[r]
            res_y = self.res_y[r]
            if self.dr_x == res_x and self.dr_y == res_y:
                # The driver is at a restaurant. Check if any accepted order can be picked up from here.
                for o in range(self.n_orders):
                    # if an order is assigned to this driver, if it is open
                    # and if it is ordered from the restaurant the driver is at, then pick it up
                    if self.o_status[o] == 2 and self.o_res_map[o] == r:
                        self.o_status[o] = 3  # set order status to picked up
                        # self.reward += (self.order_timeout - self.o_time[o]) * 0.1

        # Check for deliveries
        for o in range(self.n_orders):
            # If order is picked up by driver and driver is at delivery location, deliver the order
            if self.o_status[o] == 3 and (self.dr_x == self.o_x[o] and self.dr_y == self.o_y[o]):
                # 50 cents of tip/penalty for early/late delivery.
                # self.reward += (self.order_promise - self.o_time[o]) * 0.5
                if self.o_time[o] <= self.order_promise:
                    self.reward += self.order_promise * 0.8
                self.reward += (self.order_timeout - self.o_time[o]) * 0.5
                self.dr_used_capacity -= 1
                self.o_status[o] = 0
                self.o_time[o] = 0
                self.o_res_map[o] = -1
                self.o_x[o] = 0
                self.o_y[o] = 0