def sliding_window()

in tensorflow_text/python/ops/sliding_window_op.py [0:0]


def sliding_window(data, width, axis=-1, name=None):
  """Builds a sliding window for `data` with a specified width.

  Returns a tensor constructed from `data`, where each element in
  dimension `axis` is a slice of `data` starting at the corresponding
  position, with the given width and step size.  I.e.:

  * `result.shape.ndims = data.shape.ndims + 1`
  * `result[i1..iaxis, a] = data[i1..iaxis, a:a+width]`
    (where `0 <= a < data[i1...iaxis].shape[0] - (width - 1)`).

  Note that each result row (along dimension `axis`) has `width - 1` fewer items
  than the corresponding `data` row.  If a `data` row has fewer than `width`
  items, then the corresponding `result` row will be empty.  If you wish for
  the `result` rows to be the same size as the `data` rows, you can use
  `pad_along_dimension` to add `width - 1` padding elements before calling
  this op.

  #### Examples:

  Sliding window (width=3) across a sequence of tokens:

  >>> # input: <string>[sequence_length]
  >>> input = tf.constant(["one", "two", "three", "four", "five", "six"])
  >>> # output: <string>[sequence_length-2, 3]
  >>> sliding_window(data=input, width=3, axis=0)
  <tf.Tensor: shape=(4, 3), dtype=string, numpy=
      array([[b'one', b'two', b'three'],
             [b'two', b'three', b'four'],
             [b'three', b'four', b'five'],
             [b'four', b'five', b'six']], dtype=object)>

  Sliding window (width=2) across the inner dimension of a ragged matrix
  containing a batch of token sequences:

  >>> # input: <string>[num_sentences, (num_words)]
  >>> input = tf.ragged.constant(
  ...     [['Up', 'high', 'in', 'the', 'air'],
  ...      ['Down', 'under', 'water'],
  ...      ['Away', 'to', 'outer', 'space']])
  >>> # output: <string>[num_sentences, (num_word-1), 2]
  >>> sliding_window(input, width=2, axis=-1)
  <tf.RaggedTensor [[[b'Up', b'high'], [b'high', b'in'], [b'in', b'the'],
                     [b'the', b'air']], [[b'Down', b'under'],
                     [b'under', b'water']],
                    [[b'Away', b'to'], [b'to', b'outer'],
                     [b'outer', b'space']]]>

  Sliding window across the second dimension of a 3-D tensor containing
  batches of sequences of embedding vectors:

  >>> # input: <int32>[num_sequences, sequence_length, embedding_size]
  >>> input = tf.constant([
  ...     [[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], [5, 5, 1]],
  ...     [[1, 1, 2], [2, 2, 2], [3, 3, 2], [4, 4, 2], [5, 5, 2]]])
  >>> # output: <int32>[num_sequences, sequence_length-1, 2, embedding_size]
  >>> sliding_window(data=input, width=2, axis=1)
  <tf.Tensor: shape=(2, 4, 2, 3), dtype=int32, numpy=
      array([[[[1, 1, 1],
               [2, 2, 1]],
              [[2, 2, 1],
               [3, 3, 1]],
              [[3, 3, 1],
               [4, 4, 1]],
              [[4, 4, 1],
               [5, 5, 1]]],
             [[[1, 1, 2],
               [2, 2, 2]],
              [[2, 2, 2],
               [3, 3, 2]],
              [[3, 3, 2],
               [4, 4, 2]],
              [[4, 4, 2],
               [5, 5, 2]]]], dtype=int32)>

  Args:
    data: `<dtype> [O1...ON, A, I1...IM]`
      A potentially ragged K-dimensional tensor with outer dimensions of size
      `O1...ON`; axis dimension of size `A`; and inner dimensions of size
      `I1...IM`.  I.e. `K = N + 1 + M`, where `N>=0` and `M>=0`.

    width: An integer constant specifying the width of the window. Must be
      greater than zero.

    axis: An integer constant specifying the axis along which sliding window
      is computed. Negative axis values from `-K` to `-1` are supported.

    name: The name for this op (optional).

  Returns:
    A `K+1` dimensional tensor with the same dtype as `data`, where:

    * `result[i1..iaxis, a]` = `data[i1..iaxis, a:a+width]`
    * `result.shape[:axis]` = `data.shape[:axis]`
    * `result.shape[axis]` = `data.shape[axis] - (width - 1)`
    * `result.shape[axis + 1]` = `width`
    * `result.shape[axis + 2:]` = `data.shape[axis + 1:]`
  """
  with ops.name_scope(name, "SlidingWindow", [data, axis]):
    data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name="data")

    if not isinstance(axis, int):
      raise TypeError("axis must be an int")

    if not isinstance(width, int):
      raise TypeError("width must be an int")

    if data.shape.ndims is not None and (axis < -data.shape.ndims or
                                         axis >= data.shape.ndims):
      raise errors.InvalidArgumentError(
          None, None, "axis must be between -k <= axis <= -1 OR 0 <= axis < k")

    if width <= 0:
      raise errors.InvalidArgumentError(
          None, None, "width must be an integer greater than 0")

    slices = []
    for start in range(width):
      stop = None if start - width + 1 == 0 else start - width + 1
      if axis >= 0:
        idx = [slice(None)] * axis + [slice(start, stop)]
      else:
        idx = [Ellipsis, slice(start, stop)] + [slice(None)] * (-axis - 1)
      slices.append(data[idx])

    # Stack the slices.
    stack_axis = axis + 1 if axis >= 0 else axis
    return array_ops.stack(slices, stack_axis)