in tensorflow_transform/tf_metadata/schema_utils.py [0:0]
def _sparse_feature_as_feature_spec(feature, feature_by_name, string_domains):
"""Returns a representation of a SparseFeature as a feature spec."""
index_keys = [index_feature.name for index_feature in feature.index_feature]
index_features = []
for index_key in index_keys:
try:
index_features.append(feature_by_name.pop(index_key))
except KeyError:
raise ValueError(
'sparse_feature "{}" referred to index feature "{}" which did not '
'exist in the schema'.format(feature.name, index_key))
value_key = feature.value_feature.name
try:
value_feature = feature_by_name.pop(value_key)
except KeyError:
raise ValueError(
'sparse_feature "{}" referred to value feature "{}" which did not '
'exist in the schema or was referred to as an index or value multiple '
'times.'.format(feature.name, value_key))
shape = []
for index_feature, index_key in zip(index_features, index_keys):
if index_feature.HasField('int_domain'):
# Currently we only handle O-based INT index features whose minimum
# domain value must be zero.
if not index_feature.int_domain.HasField('min'):
raise ValueError('Cannot determine dense shape of sparse feature '
'"{}". The minimum domain value of index feature "{}"'
' is not set.'.format(feature.name, index_key))
if index_feature.int_domain.min != 0:
raise ValueError('Only 0-based index features are supported. Sparse '
'feature "{}" has index feature "{}" whose minimum '
'domain value is {}.'.format(
feature.name, index_key,
index_feature.int_domain.min))
if not index_feature.int_domain.HasField('max'):
raise ValueError('Cannot determine dense shape of sparse feature '
'"{}". The maximum domain value of index feature "{}"'
' is not set.'.format(feature.name, index_key))
shape.append(index_feature.int_domain.max + 1)
elif len(index_keys) == 1:
raise ValueError('Cannot determine dense shape of sparse feature "{}".'
' The index feature "{}" had no int_domain set.'.format(
feature.name, index_key))
else:
shape.append(-1)
dtype = _feature_dtype(value_feature)
if len(index_keys) != len(shape):
raise ValueError(
'sparse_feature "{}" had rank {} (shape {}) but {} index keys were'
' given'.format(feature.name, len(shape), shape, len(index_keys)))
spec = tf.io.SparseFeature(index_keys, value_key, dtype, shape,
feature.is_sorted)
domain = _get_domain(value_feature, string_domains)
return spec, domain