in tzrec/tools/convert_easyrec_config_to_tzrec_config.py [0:0]
def _create_feature_config_no_fg(self, pipeline_config):
"""Create tzrec feature config no fg json."""
easyrec_feature_config = easyrec_feature_config_pb2.FeatureConfig() # NOQA
for cfg in self.easyrec_config.feature_configs:
if cfg.feature_name:
feature_name = cfg.feature_name
else:
feature_name = list(cfg.input_names)[0]
input_names = cfg.input_names
feature_type = cfg.feature_type
feature_config = None
if feature_type == easyrec_feature_config.IdFeature:
feature_config = tzrec_feature_pb2.FeatureConfig()
feature = tzrec_feature_pb2.IdFeature()
feature.feature_name = feature_name
feature.expression = f"user:{input_names[0]}"
feature.embedding_dim = cfg.embedding_dim
feature.hash_bucket_size = cfg.hash_bucket_size
feature_config.ClearField("feature")
feature_config.id_feature.CopyFrom(feature)
elif feature_type == easyrec_feature_config.TagFeature:
feature_config = tzrec_feature_pb2.FeatureConfig()
feature = tzrec_feature_pb2.IdFeature()
feature.feature_name = feature_name
feature.expression = f"user:{input_names[0]}"
feature.embedding_dim = cfg.embedding_dim
feature.hash_bucket_size = cfg.hash_bucket_size
if cfg.HasField("kv_separator"):
feature.weighted = True
feature_config.ClearField("feature")
feature_config.id_feature.CopyFrom(feature)
elif feature_type == easyrec_feature_config.SequenceFeature:
feature_config = tzrec_feature_pb2.FeatureConfig()
if cfg.sub_feature_type == easyrec_feature_config.RawFeature:
feature = tzrec_feature_pb2.SequenceRawFeature()
feature.feature_name = feature_name
feature.expression = f"user:{input_names[0]}"
feature.sequence_length = cfg.sequence_length
feature.sequence_delim = cfg.separator
feature.embedding_dim = cfg.embedding_dim
boundaries = list(cfg.boundaries)
if len(boundaries) > 0:
feature.boundaries.extend(boundaries)
feature_config.ClearField("feature")
feature_config.sequence_raw_feature.CopyFrom(feature)
else:
feature = tzrec_feature_pb2.SequenceIdFeature()
feature.feature_name = feature_name
feature.expression = f"user:{input_names[0]}"
feature.sequence_length = cfg.sequence_length
feature.sequence_delim = cfg.separator
feature.embedding_dim = cfg.embedding_dim
if cfg.HasField("hash_bucket_size"):
feature.hash_bucket_size = cfg.hash_bucket_size
if cfg.HasField("num_buckets"):
feature.num_buckets = cfg.num_buckets
feature_config.ClearField("feature")
feature_config.sequence_id_feature.CopyFrom(feature)
if cfg.sequence_length <= 1:
logger.error(f"{feature_name} sequence_length is invalid !!!")
elif feature_type == easyrec_feature_config.RawFeature:
feature_config = tzrec_feature_pb2.FeatureConfig()
feature = tzrec_feature_pb2.RawFeature()
feature.feature_name = feature_name
feature.expression = f"user:{input_names[0]}"
boundaries = list(cfg.boundaries)
if cfg.HasField("embedding_dim"):
feature.embedding_dim = cfg.embedding_dim
if len(boundaries):
feature.boundaries.extend(boundaries)
feature_config.ClearField("feature")
feature_config.raw_feature.CopyFrom(feature)
elif feature_type == easyrec_feature_config.ComboFeature:
feature_config = tzrec_feature_pb2.FeatureConfig()
feature = tzrec_feature_pb2.ComboFeature()
feature.feature_name = feature_name
for input in list(cfg.input_names):
feature.expression.append(f"user:{input}")
feature.embedding_dim = cfg.embedding_dim
feature.hash_bucket_size = cfg.hash_bucket_size
feature_config.ClearField("feature")
feature_config.combo_feature.CopyFrom(feature)
elif feature_type == easyrec_feature_config.LookupFeature:
feature_config = tzrec_feature_pb2.FeatureConfig()
feature = tzrec_feature_pb2.LookupFeature()
feature.feature_name = feature_name
feature.map = f"user:{input_names[0]}"
feature.key = f"user:{input_names[1]}"
if cfg.HasField("embedding_dim"):
feature.embedding_dim = cfg.embedding_dim
if len(list(cfg.boundaries)):
feature.boundaries.extend(list(cfg.boundaries))
feature_config.ClearField("feature")
feature_config.lookup_feature.CopyFrom(feature)
else:
logger.error(f"{feature_name} can't converted")
if feature_config is not None:
logger.info(f"{feature_name} converted succeeded")
pipeline_config.feature_configs.append(feature_config)
return pipeline_config