void AudioBuffer::push_tensor()

in torchaudio/csrc/ffmpeg/buffer.cpp [104:157]


void AudioBuffer::push_tensor(torch::Tensor t) {
  // If frames_per_chunk < 0, users want to fetch all frames.
  // Just push back to chunks and that's it.
  if (frames_per_chunk < 0) {
    chunks.push_back(t);
    num_buffered_frames += t.size(0);
    return;
  }

  // Push
  // Note:
  // For audio, the incoming tensor contains multiple of samples.
  // For small `frames_per_chunk` value, it might be more than `max_frames`.
  // If we push the tensor as-is, then, the whole frame might be popped at
  // trimming stage, resulting buffer always empty. So we slice push the
  // incoming Tensor.

  // Check the last inserted Tensor and if the numbe of frames is not
  // frame_per_chunk, reprocess it again with the incomping tensor
  if (num_buffered_frames % frames_per_chunk) {
    torch::Tensor prev = chunks.back();
    chunks.pop_back();
    num_buffered_frames -= prev.size(0);
    t = torch::cat({prev, t}, 0);
  }

  while (true) {
    int num_input_frames = t.size(0);
    if (num_input_frames <= frames_per_chunk) {
      chunks.push_back(t);
      num_buffered_frames += num_input_frames;
      break;
    }
    // The input tensor contains more frames than frames_per_chunk
    auto splits = torch::tensor_split(t, {frames_per_chunk, num_input_frames});
    chunks.push_back(splits[0]);
    num_buffered_frames += frames_per_chunk;
    t = splits[1];
  }

  // Trim
  // If frames_per_chunk > 0, we only retain the following number of frames and
  // Discard older frames.
  int max_frames = num_chunks * frames_per_chunk;
  while (num_buffered_frames > max_frames) {
    TORCH_WARN_ONCE(
        "The number of buffered frames exceeded the buffer size. "
        "Dropping the old frames. "
        "To avoid this, you can set a higher buffer_chunk_size value.");
    torch::Tensor& t = chunks.front();
    num_buffered_frames -= t.size(0);
    chunks.pop_front();
  }
}