in torchaudio/csrc/ffmpeg/buffer.cpp [104:157]
void AudioBuffer::push_tensor(torch::Tensor t) {
// If frames_per_chunk < 0, users want to fetch all frames.
// Just push back to chunks and that's it.
if (frames_per_chunk < 0) {
chunks.push_back(t);
num_buffered_frames += t.size(0);
return;
}
// Push
// Note:
// For audio, the incoming tensor contains multiple of samples.
// For small `frames_per_chunk` value, it might be more than `max_frames`.
// If we push the tensor as-is, then, the whole frame might be popped at
// trimming stage, resulting buffer always empty. So we slice push the
// incoming Tensor.
// Check the last inserted Tensor and if the numbe of frames is not
// frame_per_chunk, reprocess it again with the incomping tensor
if (num_buffered_frames % frames_per_chunk) {
torch::Tensor prev = chunks.back();
chunks.pop_back();
num_buffered_frames -= prev.size(0);
t = torch::cat({prev, t}, 0);
}
while (true) {
int num_input_frames = t.size(0);
if (num_input_frames <= frames_per_chunk) {
chunks.push_back(t);
num_buffered_frames += num_input_frames;
break;
}
// The input tensor contains more frames than frames_per_chunk
auto splits = torch::tensor_split(t, {frames_per_chunk, num_input_frames});
chunks.push_back(splits[0]);
num_buffered_frames += frames_per_chunk;
t = splits[1];
}
// Trim
// If frames_per_chunk > 0, we only retain the following number of frames and
// Discard older frames.
int max_frames = num_chunks * frames_per_chunk;
while (num_buffered_frames > max_frames) {
TORCH_WARN_ONCE(
"The number of buffered frames exceeded the buffer size. "
"Dropping the old frames. "
"To avoid this, you can set a higher buffer_chunk_size value.");
torch::Tensor& t = chunks.front();
num_buffered_frames -= t.size(0);
chunks.pop_front();
}
}