easy_rec/python/protos/feature_config.proto (139 lines of code) (raw):
syntax = "proto2";
package protos;
import "easy_rec/python/protos/hyperparams.proto";
import "easy_rec/python/protos/dnn.proto";
import "easy_rec/python/protos/layer.proto";
enum WideOrDeep {
DEEP = 0;
WIDE = 1;
WIDE_AND_DEEP = 2;
}
message AttentionCombiner {
}
message MultiHeadAttentionCombiner {
}
message SequenceCombiner {
oneof combiner {
AttentionCombiner attention = 1;
MultiHeadAttentionCombiner multi_head_attention = 2;
TextCNN text_cnn = 3;
}
}
message EVParams {
optional uint64 filter_freq = 1 [default=0];
optional uint64 steps_to_live = 2 [default=0];
// use embedding cache, only for sok hybrid embedding
optional bool use_cache = 3 [default=false];
// for sok hybrid key value embedding
optional uint64 init_capacity = 4 [default=8388608];
optional uint64 max_capacity = 5 [default=16777216];
}
message FeatureConfig {
enum FeatureType {
IdFeature = 0;
RawFeature = 1;
TagFeature = 2;
ComboFeature = 3;
LookupFeature = 4;
SequenceFeature = 5;
ExprFeature = 6;
PassThroughFeature = 7;
}
enum FieldType {
INT32 = 0;
INT64 = 1;
STRING = 2;
FLOAT = 4;
DOUBLE = 5;
BOOL = 6;
}
optional string feature_name = 1;
// input field names: must be included in DatasetConfig.input_fields
repeated string input_names = 2;
required FeatureType feature_type = 3 [default = IdFeature];
optional string embedding_name = 4 [default = ''];
optional uint32 embedding_dim = 5 [default = 0];
optional uint64 hash_bucket_size = 6 [default = 0];
// for categorical_column_with_identity
optional uint64 num_buckets = 7 [default = 0];
// only for raw features
repeated double boundaries = 8;
// separator with in features
optional string separator = 9 [default = '|'];
// delimeter to separator key from value
optional string kv_separator = 10;
// delimeter to separate sequence multi-values
optional string seq_multi_sep = 101;
// truncate sequence data to max_seq_len
optional uint32 max_seq_len = 102;
optional string vocab_file = 11;
repeated string vocab_list = 12;
// many other field share this config
repeated string shared_names = 16;
// lookup max select element number, default 10
optional int32 lookup_max_sel_elem_num = 17 [default = 10];
// max_partitions
optional int32 max_partitions = 18 [default = 1];
// combiner
optional string combiner = 19 [default = 'sum'];
// embedding initializer
optional Initializer initializer = 20;
// number of digits kept after dot in format float/double to string
// scientific format is not used.
// in default it is not allowed to convert float/double to string
optional int32 precision = 21 [default = -1];
// normalize raw feature to [0-1]
optional double min_val = 212 [default=0.0];
optional double max_val = 213 [default=0.0];
// normalization function for raw features:
// such as: tf.math.log1p
optional string normalizer_fn = 214;
// raw feature of multiple dimensions
optional uint32 raw_input_dim = 24 [default=1];
// sequence feature combiner
optional SequenceCombiner sequence_combiner = 25;
// sub feature type for sequence feature
optional FeatureType sub_feature_type = 26 [default = IdFeature];
// sequence length
optional uint32 sequence_length = 27 [default = 1];
// for expr feature
optional string expression = 30;
// embedding variable params
optional EVParams ev_params = 31;
// for combo feature:
// if not set, use cross_column
// otherwise, the input features are first joined
// and then passed to categorical_column
optional string combo_join_sep = 401 [default = ''];
// separator for each inputs
// if not set, combo inputs will not be split
repeated string combo_input_seps = 402;
}
message FeatureConfigV2 {
repeated FeatureConfig features = 1 ;
// force place embedding lookup ops on cpu to improve
// training and inference efficiency.
optional bool embedding_on_cpu = 2 [default=false];
}
message FeatureGroupConfig {
optional string group_name = 1;
repeated string feature_names = 2;
optional WideOrDeep wide_deep = 3 [default = DEEP];
repeated SeqAttGroupConfig sequence_features = 4;
optional bool negative_sampler = 5 [default = false];
}
message SeqAttMap {
repeated string key = 1;
repeated string hist_seq = 2;
repeated string aux_hist_seq = 3;
}
message SeqAttGroupConfig {
optional string group_name = 1;
repeated SeqAttMap seq_att_map = 2;
optional bool tf_summary = 3 [default = false];
optional DNN seq_dnn = 4;
optional bool allow_key_search = 5 [default = false];
optional bool need_key_feature = 6 [default = true];
optional bool allow_key_transform = 7 [default = false];
optional bool transform_dnn = 8 [default = false];
}