def create_model()

in 10_mlops/model.py [0:0]


def create_model():
    real = {
        colname: tf.feature_column.numeric_column(colname)
        for colname in
        (
                'dep_delay,taxi_out,distance,dep_hour,is_weekday,' +
                'dep_airport_lat,dep_airport_lon,' +
                'arr_airport_lat,arr_airport_lon'
        ).split(',')
    }
    sparse = {
        'carrier': tf.feature_column.categorical_column_with_vocabulary_list('carrier',
                                                                             vocabulary_list='AS,VX,F9,UA,US,WN,HA,EV,MQ,DL,OO,B6,NK,AA'.split(
                                                                                 ',')),
        'origin': tf.feature_column.categorical_column_with_hash_bucket('origin', hash_bucket_size=1000),
        'dest': tf.feature_column.categorical_column_with_hash_bucket('dest', hash_bucket_size=1000),
    }

    inputs = {
        colname: tf.keras.layers.Input(name=colname, shape=(), dtype='float32')
        for colname in real.keys()
    }
    inputs.update({
        colname: tf.keras.layers.Input(name=colname, shape=(), dtype='string')
        for colname in sparse.keys()
    })

    latbuckets = np.linspace(20.0, 50.0, NUM_BUCKETS).tolist()  # USA
    lonbuckets = np.linspace(-120.0, -70.0, NUM_BUCKETS).tolist()  # USA
    disc = {}
    disc.update({
        'd_{}'.format(key): tf.feature_column.bucketized_column(real[key], latbuckets)
        for key in ['dep_airport_lat', 'arr_airport_lat']
    })
    disc.update({
        'd_{}'.format(key): tf.feature_column.bucketized_column(real[key], lonbuckets)
        for key in ['dep_airport_lon', 'arr_airport_lon']
    })

    # cross columns that make sense in combination
    sparse['dep_loc'] = tf.feature_column.crossed_column(
        [disc['d_dep_airport_lat'], disc['d_dep_airport_lon']], NUM_BUCKETS * NUM_BUCKETS)
    sparse['arr_loc'] = tf.feature_column.crossed_column(
        [disc['d_arr_airport_lat'], disc['d_arr_airport_lon']], NUM_BUCKETS * NUM_BUCKETS)
    sparse['dep_arr'] = tf.feature_column.crossed_column([sparse['dep_loc'], sparse['arr_loc']], NUM_BUCKETS ** 4)

    # embed all the sparse columns
    embed = {
        'embed_{}'.format(colname): tf.feature_column.embedding_column(col, NUM_EMBEDS)
        for colname, col in sparse.items()
    }
    real.update(embed)

    # one-hot encode the sparse columns
    sparse = {
        colname: tf.feature_column.indicator_column(col)
        for colname, col in sparse.items()
    }

    model = wide_and_deep_classifier(
        inputs,
        linear_feature_columns=sparse.values(),
        dnn_feature_columns=real.values(),
        dnn_hidden_units=DNN_HIDDEN_UNITS)

    return model