in tensorflow_transform/mappers.py [0:0]
def _assign_buckets(x_values: tf.Tensor,
bucket_boundaries: tf.Tensor) -> tf.Tensor:
"""Assigns every value in x to a bucket index defined by bucket_boundaries.
Args:
x_values: a `Tensor` of dtype float32 with no more than one dimension.
bucket_boundaries: The bucket boundaries represented as a rank 2 `Tensor`.
Should be sorted.
Returns:
A `Tensor` of the same shape as `x_values`, with each element in the
returned tensor representing the bucketized value. Bucketized value is
in the range [0, len(bucket_boundaries)].
"""
with tf.compat.v1.name_scope(None, 'assign_buckets'):
max_value = tf.cast(tf.shape(input=bucket_boundaries)[1], dtype=tf.int64)
# We need to reverse the negated boundaries and x_values and add a final
# max boundary to work with the new bucketize op.
bucket_boundaries = tf.reverse(-bucket_boundaries, [-1])
bucket_boundaries = tf.concat([
bucket_boundaries, [[tf.reduce_max(-x_values)]]], axis=-1)
if x_values.get_shape().ndims > 1:
x_values = tf.squeeze(x_values)
# BoostedTreesBucketize assigns to lower bound instead of upper bound, so
# we need to reverse both boundaries and x_values and make them negative
# to make cases exactly at the boundary consistent.
buckets = tf_utils.apply_bucketize_op(-x_values, bucket_boundaries)
# After reversing the inputs, the assigned buckets are exactly reversed
# and need to be re-reversed to their original index.
buckets = tf.subtract(max_value, buckets)
if buckets.shape.ndims <= 1:
# As a result of the above squeeze, there might be too few bucket dims
# and we want the output shape to match the input.
if not buckets.shape.ndims:
buckets = tf.expand_dims(buckets, -1)
elif x_values.shape.ndims is not None and x_values.shape.ndims > 1:
buckets = tf.expand_dims(buckets, -1)
return buckets