def _assign_buckets()

in tensorflow_transform/mappers.py [0:0]


def _assign_buckets(x_values: tf.Tensor,
                    bucket_boundaries: tf.Tensor) -> tf.Tensor:
  """Assigns every value in x to a bucket index defined by bucket_boundaries.

  Args:
    x_values: a `Tensor` of dtype float32 with no more than one dimension.
    bucket_boundaries:  The bucket boundaries represented as a rank 2 `Tensor`.
      Should be sorted.

  Returns:
    A `Tensor` of the same shape as `x_values`, with each element in the
    returned tensor representing the bucketized value. Bucketized value is
    in the range [0, len(bucket_boundaries)].
  """
  with tf.compat.v1.name_scope(None, 'assign_buckets'):
    max_value = tf.cast(tf.shape(input=bucket_boundaries)[1], dtype=tf.int64)

    # We need to reverse the negated boundaries and x_values and add a final
    # max boundary to work with the new bucketize op.
    bucket_boundaries = tf.reverse(-bucket_boundaries, [-1])
    bucket_boundaries = tf.concat([
        bucket_boundaries, [[tf.reduce_max(-x_values)]]], axis=-1)

    if x_values.get_shape().ndims > 1:
      x_values = tf.squeeze(x_values)

    # BoostedTreesBucketize assigns to lower bound instead of upper bound, so
    # we need to reverse both boundaries and x_values and make them negative
    # to make cases exactly at the boundary consistent.
    buckets = tf_utils.apply_bucketize_op(-x_values, bucket_boundaries)
    # After reversing the inputs, the assigned buckets are exactly reversed
    # and need to be re-reversed to their original index.
    buckets = tf.subtract(max_value, buckets)

    if buckets.shape.ndims <= 1:
      # As a result of the above squeeze, there might be too few bucket dims
      # and we want the output shape to match the input.
      if not buckets.shape.ndims:
        buckets = tf.expand_dims(buckets, -1)
      elif x_values.shape.ndims is not None and x_values.shape.ndims > 1:
        buckets = tf.expand_dims(buckets, -1)

    return buckets