def ngrams()

in tensorflow_text/python/ops/ngrams_op.py [0:0]


def ngrams(data,
           width,
           axis=-1,
           reduction_type=None,
           string_separator=" ",
           name=None):
  """Create a tensor of n-grams based on the input data `data`.

  Creates a tensor of n-grams based on `data`. The n-grams are of width `width`
  and are created along axis `axis`; the n-grams are created by combining
  windows of `width` adjacent elements from `data` using `reduction_type`. This
  op is intended to cover basic use cases; more complex combinations can be
  created using the sliding_window op.

  >>> input_data = tf.ragged.constant([["e", "f", "g"], ["dd", "ee"]])
  >>> ngrams(
  ...   input_data,
  ...   width=2,
  ...   axis=-1,
  ...   reduction_type=Reduction.STRING_JOIN,
  ...   string_separator="|")
  <tf.RaggedTensor [[b'e|f', b'f|g'], [b'dd|ee']]>

  Args:
    data: The data to reduce.
    width: The width of the ngram window. If there is not sufficient data to
      fill out the ngram window, the resulting ngram will be empty.
    axis: The axis to create ngrams along. Note that for string join reductions,
      only axis '-1' is supported; for other reductions, any positive or
      negative axis can be used. Should be a constant.
    reduction_type: A member of the Reduction enum. Should be a constant.
      Currently supports:

      * `Reduction.SUM`: Add values in the window.
      * `Reduction.MEAN`: Average values in the window.
      * `Reduction.STRING_JOIN`: Join strings in the window.
        Note that axis must be -1 here.

    string_separator: The separator string used for `Reduction.STRING_JOIN`.
      Ignored otherwise. Must be a string constant, not a Tensor.
    name: The op name.

  Returns:
    A tensor of ngrams. If the input is a tf.Tensor, the output will also
      be a tf.Tensor; if the input is a tf.RaggedTensor, the output will be
      a tf.RaggedTensor.

  Raises:
    InvalidArgumentError: if `reduction_type` is either None or not a Reduction,
      or if `reduction_type` is STRING_JOIN and `axis` is not -1.
  """

  with ops.name_scope(name, "NGrams", [data, width]):
    if reduction_type is None:
      raise errors.InvalidArgumentError(None, None,
                                        "reduction_type must be specified.")

    if not isinstance(reduction_type, Reduction):
      raise errors.InvalidArgumentError(None, None,
                                        "reduction_type must be a Reduction.")

    # TODO(b/122967921): Lift this restriction after ragged_reduce_join is done.
    if reduction_type is Reduction.STRING_JOIN and axis != -1:
      raise errors.InvalidArgumentError(
          None, None, "%s requires that ngrams' 'axis' parameter be -1." %
          Reduction.STRING_JOIN.name)

    windowed_data = sliding_window(data, width, axis)

    if axis < 0:
      reduction_axis = axis
    else:
      reduction_axis = axis + 1

    # Ragged reduction ops work on both Tensor and RaggedTensor, so we can
    # use them here regardless of the type of tensor in 'windowed_data'.
    if reduction_type is Reduction.SUM:
      return math_ops.reduce_sum(windowed_data, reduction_axis)
    elif reduction_type is Reduction.MEAN:
      return math_ops.reduce_mean(windowed_data, reduction_axis)
    elif reduction_type is Reduction.STRING_JOIN:
      if isinstance(data, ragged_tensor.RaggedTensor):
        return ragged_functional_ops.map_flat_values(
            string_ops.reduce_join,
            windowed_data,
            axis=axis,
            separator=string_separator)
      else:
        return string_ops.reduce_join(
            windowed_data, axis=axis, separator=string_separator)