def __init__()

in Vehicle Routing Problem/src/vrp_environment.py [0:0]


    def __init__(self, env_config={}):

        self.vrp_view = None
        config_defaults = {'n_restaurants': 2,
                           'n_orders': 10,
                           'order_prob': 0.5,
                           'driver_capacity': 4,
                           'map_quad': (5, 5),
                           'order_promise': 60,
                           'order_timeout_prob': 0.15,
                           'episode_length': 1000,
                           'num_zones': 4,
                           'order_probs_per_zone': (0.1, 0.5, 0.3, 0.1),
                           'order_reward_min': (8, 5, 2, 1),
                           'order_reward_max': (12, 8, 5, 3),
                           'half_norm_scale_reward_per_zone': (0.5, 0.5, 0.5, 0.5),
                           'penalty_per_timestep': 0.1,
                           'penalty_per_move': 0.1,
                           'order_miss_penalty': 50}

        for key, val in config_defaults.items():
            val = env_config.get(key, val)  # Override defaults with constructor parameters
            self.__dict__[key] = val
            if key not in env_config:
                env_config[key] = val

        assert len(self.order_probs_per_zone) == self.num_zones
        assert isclose(sum(self.order_probs_per_zone), 1.0)

        self.csv_file = '/opt/ml/output/data/vrp_rewards.csv'
        self.dr_used_capacity = 0
        self.o_x = []
        self.o_y = []
        self.o_status = []
        self.o_res_map = []
        self.o_time = []
        self.reward_per_order = []

        self.dr_x = None
        self.dr_y = None

        self.game_over = False
        self.state = []
        self.reward = None

        self.clock = 0

        # map boundaries
        self.map_min_x = - self.map_quad[0]
        self.map_max_x = + self.map_quad[0]
        self.map_min_y = - self.map_quad[1]
        self.map_max_y = + self.map_quad[1]
        self.map_range_x = range(-self.map_max_x, self.map_max_x + 1)
        self.map_range_y = range(-self.map_max_y, self.map_max_y + 1)

        # zone boundaries
        self.zone_range_x = np.array_split(np.array(self.map_range_x), self.num_zones)

        # restaurant x position limits
        res_x_min = [self.map_min_x] * self.n_restaurants
        res_x_max = [self.map_max_x] * self.n_restaurants
        # restaurant y position limits
        res_y_min = [self.map_min_y] * self.n_restaurants
        res_y_max = [self.map_max_y] * self.n_restaurants

        # driver x position limits
        dr_x_min = [self.map_min_x]
        dr_x_max = [self.map_max_x]
        # driver y position limits
        dr_y_min = [self.map_min_y]
        dr_y_max = [self.map_max_y]

        dr_used_capacity_min = [0]
        dr_used_capacity_max = [self.driver_capacity]

        # n_orders for x position limits
        o_x_min = [self.map_min_x] * self.n_orders
        o_x_max = [self.map_max_x] * self.n_orders
        # n_orders for y position limits
        o_y_min = [self.map_min_y] * self.n_orders
        o_y_max = [self.map_max_y] * self.n_orders

        # order status: 0 - inactive(not created, cancelled, delivered), 1 - open, 2 - accepted, 3 - picked-up
        o_status_min = [0] * self.n_orders
        o_status_max = [3] * self.n_orders

        # Reward per order
        reward_per_order_min = [0] * self.n_orders
        reward_per_order_max = [max(self.order_reward_max)] * self.n_orders

        # order-restaurant mapping, i.e. which the order belongs to which restaurant
        o_res_map_min = [-1] * self.n_orders
        o_res_map_max = [self.n_restaurants - 1] * self.n_orders

        # time elapsed since the order has been placed
        o_time_min = [0] * self.n_orders
        o_time_max = [self.order_promise] * self.n_orders

        # Create the observation space
        orig_observation_space = Box(low=np.array(res_x_min +
                                                  res_y_min +
                                                  dr_x_min +
                                                  dr_y_min +
                                                  dr_used_capacity_min +
                                                  [self.driver_capacity] +
                                                  o_x_min +
                                                  o_y_min +
                                                  o_status_min +
                                                  o_res_map_min +
                                                  o_time_min +
                                                  reward_per_order_min
                                                  ),
                                     high=np.array(res_x_max +
                                                   res_y_max +
                                                   dr_x_max +
                                                   dr_y_max +
                                                   dr_used_capacity_max +
                                                   [self.driver_capacity] +
                                                   o_x_max +
                                                   o_y_max +
                                                   o_status_max +
                                                   o_res_map_max +
                                                   o_time_max +
                                                   reward_per_order_max
                                                   ),
                                     dtype=np.int16
                                     )
        # number of possible actions
        # Wait, Accept Order i, pick up order i, deliver order i, return to restaurant j
        self.max_avail_actions = 1 + 3 * self.n_orders + self.n_restaurants
        self.observation_space = Dict({
            # a mask of valid actions (e.g., [0, 0, 1, 0, 0, 1] for 6 max avail)
            "action_mask": Box(
                0,
                1,
                shape=(self.max_avail_actions,),
                dtype=np.float32),
            "real_obs": orig_observation_space
        }
        )
        self.action_space = Discrete(self.max_avail_actions)