in tensorflow_transform/coders/example_proto_coder.py [0:0]
def __init__(self, schema, serialized=True):
"""Build an ExampleProtoCoder.
Args:
schema: A `Schema` proto.
serialized: Whether to encode serialized Example protos (as opposed to
in-memory Example protos).
Raises:
ValueError: If `schema` is invalid.
"""
self._schema = schema
self._serialized = serialized
# Using pre-allocated tf.train.Example and FeatureHandler objects for
# performance reasons.
#
# Since the output of "encode" is deep as opposed to shallow
# transformations, and since the schema always fully defines the Example's
# FeatureMap (ie all fields are always cleared/assigned or copied), the
# optimization and implementation are correct and thread-compatible.
self._encode_example_cache = tf.train.Example()
self._feature_handlers = []
for name, feature_spec in schema_utils.schema_as_feature_spec(
schema).feature_spec.items():
if isinstance(feature_spec, tf.io.FixedLenFeature):
self._feature_handlers.append(
_FixedLenFeatureHandler(name, feature_spec))
elif isinstance(feature_spec, tf.io.VarLenFeature):
self._feature_handlers.append(
_VarLenFeatureHandler(name, feature_spec.dtype))
elif isinstance(feature_spec, tf.io.SparseFeature):
index_keys = (
feature_spec.index_key if isinstance(feature_spec.index_key, list)
else [feature_spec.index_key])
for index_key in index_keys:
self._feature_handlers.append(
_VarLenFeatureHandler(index_key, tf.int64))
self._feature_handlers.append(
_VarLenFeatureHandler(feature_spec.value_key, feature_spec.dtype))
elif common_types.is_ragged_feature(feature_spec):
uniform_partition = False
for partition in feature_spec.partitions:
if isinstance(partition, tf.io.RaggedFeature.RowLengths):
if uniform_partition:
raise ValueError(
'Encountered ragged dimension after uniform for feature '
'"{}": only inner dimensions can be uniform. Feature spec '
'is {}'.format(name, feature_spec))
self._feature_handlers.append(
_VarLenFeatureHandler(partition.key, tf.int64))
elif isinstance(partition, tf.io.RaggedFeature.UniformRowLength):
# We don't encode uniform partitions since they can be recovered
# from the shape information.
uniform_partition = True
else:
raise ValueError(
'Only `RowLengths` and `UniformRowLength` partitions of ragged '
'features are supported, got {}'.format(type(partition)))
self._feature_handlers.append(
_VarLenFeatureHandler(feature_spec.value_key, feature_spec.dtype))
else:
raise ValueError('feature_spec should be one of tf.io.FixedLenFeature, '
'tf.io.VarLenFeature, tf.io.SparseFeature or '
'tf.io.RaggedFeature: "{}" was {}'.format(
name, type(feature_spec)))
for feature_handler in self._feature_handlers:
feature_handler.initialize_encode_cache(self._encode_example_cache)