def decide_action()

in archived/rl_traveling_salesman_vehicle_routing_coach/src/VRP_baseline_utils.py [0:0]


def decide_action(prev_o_status, env, visit_stops):
    action = 0
    o_x = env.o_x
    o_y = env.o_y
    dr_x = env.dr_x
    dr_y = env.dr_y
    o_status = env.o_status

    driver_xy = (dr_x, dr_y)
    order_xy = list(zip(o_x, o_y))
    if prev_o_status == o_status:
        # Nothing has changed, move to the next stop if available
        if visit_stops:
            if driver_xy == visit_stops[0] and len(visit_stops) > 1:
                visit_stops = visit_stops[1:]
                action = vrp_action_go_from_a_to_b(driver_xy, visit_stops[0])
            else:
                action = vrp_action_go_from_a_to_b(driver_xy, visit_stops[0])
    else:
        # Naively accept any outstanding order
        if 1 in o_status:
            order_to_accept = o_status.index(1)
            action = 5 + order_to_accept
        else:
            new_orders = []
            delivered_expired_orders = []
            recently_accepted_orders = []
            for oi, os in enumerate(o_status):
                if os != prev_o_status[oi]:
                    # Order status has changed
                    if os == 0:
                        delivered_expired_orders.append(oi)
                    elif os == 1:
                        new_orders.append(oi)
                    elif os == 2:
                        recently_accepted_orders.append(oi)
            # Replan if there are new orders
            if new_orders:
                visit_stops = extract_state_for_dp(env)
            # Replan for accepted orders
            elif recently_accepted_orders:
                visit_stops = extract_state_for_dp(env)
            # No new orders, but some orders delivered/expired, remove the stop
            elif delivered_expired_orders:
                delivered_expired_xy = [order_xy[i] for i in delivered_expired_orders]
                visit_stops = [xy for xy in visit_stops if xy not in delivered_expired_xy]
            if visit_stops:
                action = vrp_action_go_from_a_to_b(driver_xy, visit_stops[0])

    return action, visit_stops