easy_rec/python/protos/feature_config.proto (139 lines of code) (raw):

syntax = "proto2"; package protos; import "easy_rec/python/protos/hyperparams.proto"; import "easy_rec/python/protos/dnn.proto"; import "easy_rec/python/protos/layer.proto"; enum WideOrDeep { DEEP = 0; WIDE = 1; WIDE_AND_DEEP = 2; } message AttentionCombiner { } message MultiHeadAttentionCombiner { } message SequenceCombiner { oneof combiner { AttentionCombiner attention = 1; MultiHeadAttentionCombiner multi_head_attention = 2; TextCNN text_cnn = 3; } } message EVParams { optional uint64 filter_freq = 1 [default=0]; optional uint64 steps_to_live = 2 [default=0]; // use embedding cache, only for sok hybrid embedding optional bool use_cache = 3 [default=false]; // for sok hybrid key value embedding optional uint64 init_capacity = 4 [default=8388608]; optional uint64 max_capacity = 5 [default=16777216]; } message FeatureConfig { enum FeatureType { IdFeature = 0; RawFeature = 1; TagFeature = 2; ComboFeature = 3; LookupFeature = 4; SequenceFeature = 5; ExprFeature = 6; PassThroughFeature = 7; } enum FieldType { INT32 = 0; INT64 = 1; STRING = 2; FLOAT = 4; DOUBLE = 5; BOOL = 6; } optional string feature_name = 1; // input field names: must be included in DatasetConfig.input_fields repeated string input_names = 2; required FeatureType feature_type = 3 [default = IdFeature]; optional string embedding_name = 4 [default = '']; optional uint32 embedding_dim = 5 [default = 0]; optional uint64 hash_bucket_size = 6 [default = 0]; // for categorical_column_with_identity optional uint64 num_buckets = 7 [default = 0]; // only for raw features repeated double boundaries = 8; // separator with in features optional string separator = 9 [default = '|']; // delimeter to separator key from value optional string kv_separator = 10; // delimeter to separate sequence multi-values optional string seq_multi_sep = 101; // truncate sequence data to max_seq_len optional uint32 max_seq_len = 102; optional string vocab_file = 11; repeated string vocab_list = 12; // many other field share this config repeated string shared_names = 16; // lookup max select element number, default 10 optional int32 lookup_max_sel_elem_num = 17 [default = 10]; // max_partitions optional int32 max_partitions = 18 [default = 1]; // combiner optional string combiner = 19 [default = 'sum']; // embedding initializer optional Initializer initializer = 20; // number of digits kept after dot in format float/double to string // scientific format is not used. // in default it is not allowed to convert float/double to string optional int32 precision = 21 [default = -1]; // normalize raw feature to [0-1] optional double min_val = 212 [default=0.0]; optional double max_val = 213 [default=0.0]; // normalization function for raw features: // such as: tf.math.log1p optional string normalizer_fn = 214; // raw feature of multiple dimensions optional uint32 raw_input_dim = 24 [default=1]; // sequence feature combiner optional SequenceCombiner sequence_combiner = 25; // sub feature type for sequence feature optional FeatureType sub_feature_type = 26 [default = IdFeature]; // sequence length optional uint32 sequence_length = 27 [default = 1]; // for expr feature optional string expression = 30; // embedding variable params optional EVParams ev_params = 31; // for combo feature: // if not set, use cross_column // otherwise, the input features are first joined // and then passed to categorical_column optional string combo_join_sep = 401 [default = '']; // separator for each inputs // if not set, combo inputs will not be split repeated string combo_input_seps = 402; } message FeatureConfigV2 { repeated FeatureConfig features = 1 ; // force place embedding lookup ops on cpu to improve // training and inference efficiency. optional bool embedding_on_cpu = 2 [default=false]; } message FeatureGroupConfig { optional string group_name = 1; repeated string feature_names = 2; optional WideOrDeep wide_deep = 3 [default = DEEP]; repeated SeqAttGroupConfig sequence_features = 4; optional bool negative_sampler = 5 [default = false]; } message SeqAttMap { repeated string key = 1; repeated string hist_seq = 2; repeated string aux_hist_seq = 3; } message SeqAttGroupConfig { optional string group_name = 1; repeated SeqAttMap seq_att_map = 2; optional bool tf_summary = 3 [default = false]; optional DNN seq_dnn = 4; optional bool allow_key_search = 5 [default = false]; optional bool need_key_feature = 6 [default = true]; optional bool allow_key_transform = 7 [default = false]; optional bool transform_dnn = 8 [default = false]; }