torchaudio/csrc/ffmpeg/stream_processor.h (43 lines of code) (raw):
#pragma once
#include <torch/torch.h>
#include <torchaudio/csrc/ffmpeg/decoder.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <torchaudio/csrc/ffmpeg/sink.h>
#include <map>
namespace torchaudio {
namespace ffmpeg {
class StreamProcessor {
public:
using KeyType = int;
private:
AVFramePtr pFrame1;
AVFramePtr pFrame2;
// Components for decoding source media
double decoder_time_base; // for debug
Decoder decoder;
KeyType current_key = 0;
std::map<KeyType, Sink> sinks;
public:
StreamProcessor(AVCodecParameters* codecpar);
~StreamProcessor() = default;
// Non-copyable
StreamProcessor(const StreamProcessor&) = delete;
StreamProcessor& operator=(const StreamProcessor&) = delete;
// Movable
StreamProcessor(StreamProcessor&&) = default;
StreamProcessor& operator=(StreamProcessor&&) = default;
//////////////////////////////////////////////////////////////////////////////
// Configurations
//////////////////////////////////////////////////////////////////////////////
// 1. Initialize decoder (if not initialized yet)
// 2. Configure a new audio/video filter.
// If the custom parameter is provided, then perform resize, resample etc..
// otherwise, the filter only converts the sample type.
// 3. Configure a buffer.
// 4. Return filter ID.
KeyType add_stream(
AVRational input_time_base,
AVCodecParameters* codecpar,
int frames_per_chunk,
int num_chunks,
std::string filter_description);
// 1. Remove the stream
void remove_stream(KeyType key);
//////////////////////////////////////////////////////////////////////////////
// Query methods
//////////////////////////////////////////////////////////////////////////////
std::string get_filter_description(KeyType key) const;
bool is_buffer_ready() const;
//////////////////////////////////////////////////////////////////////////////
// The streaming process
//////////////////////////////////////////////////////////////////////////////
// 1. decode the input frame
// 2. pass the decoded data to filters
// 3. each filter store the result to the corresponding buffer
// - Sending NULL will drain (flush) the internal
int process_packet(AVPacket* packet);
// flush the internal buffer of decoder.
// To be use when seeking
void flush();
private:
int send_frame(AVFrame* pFrame);
//////////////////////////////////////////////////////////////////////////////
// Retrieval
//////////////////////////////////////////////////////////////////////////////
public:
// Get the chunk from the given filter result
c10::optional<torch::Tensor> pop_chunk(KeyType key);
};
} // namespace ffmpeg
} // namespace torchaudio