torchaudio/csrc/ffmpeg/ffmpeg.cpp (156 lines of code) (raw):
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
namespace torchaudio {
namespace ffmpeg {
////////////////////////////////////////////////////////////////////////////////
// AVFormatContext
////////////////////////////////////////////////////////////////////////////////
void AVFormatContextDeleter::operator()(AVFormatContext* p) {
avformat_close_input(&p);
};
namespace {
AVDictionary* get_option_dict(
const std::map<std::string, std::string>& option) {
AVDictionary* opt = nullptr;
for (auto& it : option) {
av_dict_set(&opt, it.first.c_str(), it.second.c_str(), 0);
}
return opt;
}
std::vector<std::string> clean_up_dict(AVDictionary* p) {
std::vector<std::string> ret;
// Check and copy unused keys, clean up the original dictionary
AVDictionaryEntry* t = nullptr;
do {
t = av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX);
if (t) {
ret.emplace_back(t->key);
}
} while (t);
av_dict_free(&p);
return ret;
}
std::string join(std::vector<std::string> vars) {
std::stringstream ks;
for (size_t i = 0; i < vars.size(); ++i) {
if (i == 0) {
ks << "\"" << vars[i] << "\"";
} else {
ks << ", \"" << vars[i] << "\"";
}
}
return ks.str();
}
AVFormatContext* get_format_context(
const std::string& src,
const std::string& device,
const std::map<std::string, std::string>& option) {
AVFormatContext* pFormat = NULL;
AVInputFormat* pInput =
device.empty() ? NULL : av_find_input_format(device.c_str());
AVDictionary* opt = get_option_dict(option);
int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);
auto unused_keys = clean_up_dict(opt);
if (unused_keys.size()) {
throw std::runtime_error("Unexpected options: " + join(unused_keys));
}
if (ret < 0)
throw std::runtime_error(
"Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
").");
return pFormat;
}
} // namespace
AVFormatContextPtr::AVFormatContextPtr(
const std::string& src,
const std::string& device,
const std::map<std::string, std::string>& option)
: Wrapper<AVFormatContext, AVFormatContextDeleter>(
get_format_context(src, device, option)) {
if (avformat_find_stream_info(ptr.get(), NULL) < 0)
throw std::runtime_error("Failed to find stream information.");
}
////////////////////////////////////////////////////////////////////////////////
// AVPacket
////////////////////////////////////////////////////////////////////////////////
void AVPacketDeleter::operator()(AVPacket* p) {
av_packet_free(&p);
};
namespace {
AVPacket* get_av_packet() {
AVPacket* pPacket = av_packet_alloc();
if (!pPacket)
throw std::runtime_error("Failed to allocate AVPacket object.");
return pPacket;
}
} // namespace
AVPacketPtr::AVPacketPtr()
: Wrapper<AVPacket, AVPacketDeleter>(get_av_packet()) {}
////////////////////////////////////////////////////////////////////////////////
// AVPacket - buffer unref
////////////////////////////////////////////////////////////////////////////////
AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){};
AutoPacketUnref::~AutoPacketUnref() {
av_packet_unref(p_);
}
AutoPacketUnref::operator AVPacket*() const {
return p_;
}
////////////////////////////////////////////////////////////////////////////////
// AVFrame
////////////////////////////////////////////////////////////////////////////////
void AVFrameDeleter::operator()(AVFrame* p) {
av_frame_free(&p);
};
namespace {
AVFrame* get_av_frame() {
AVFrame* pFrame = av_frame_alloc();
if (!pFrame)
throw std::runtime_error("Failed to allocate AVFrame object.");
return pFrame;
}
} // namespace
AVFramePtr::AVFramePtr() : Wrapper<AVFrame, AVFrameDeleter>(get_av_frame()) {}
////////////////////////////////////////////////////////////////////////////////
// AVCodecContext
////////////////////////////////////////////////////////////////////////////////
void AVCodecContextDeleter::operator()(AVCodecContext* p) {
avcodec_free_context(&p);
};
namespace {
AVCodecContext* get_codec_context(AVCodecParameters* pParams) {
const AVCodec* pCodec = avcodec_find_decoder(pParams->codec_id);
if (!pCodec) {
throw std::runtime_error("Unknown codec.");
}
AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
if (!pCodecContext) {
throw std::runtime_error("Failed to allocate CodecContext.");
}
return pCodecContext;
}
void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams) {
const AVCodec* pCodec = avcodec_find_decoder(pParams->codec_id);
if (avcodec_parameters_to_context(pCodecContext, pParams) < 0) {
throw std::runtime_error("Failed to set CodecContext parameter.");
}
if (avcodec_open2(pCodecContext, pCodec, NULL) < 0) {
throw std::runtime_error("Failed to initialize CodecContext.");
}
if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
pParams->channel_layout =
av_get_default_channel_layout(pCodecContext->channels);
}
} // namespace
AVCodecContextPtr::AVCodecContextPtr(AVCodecParameters* pParam)
: Wrapper<AVCodecContext, AVCodecContextDeleter>(
get_codec_context(pParam)) {
init_codec_context(ptr.get(), pParam);
}
////////////////////////////////////////////////////////////////////////////////
// AVFilterGraph
////////////////////////////////////////////////////////////////////////////////
void AVFilterGraphDeleter::operator()(AVFilterGraph* p) {
avfilter_graph_free(&p);
};
namespace {
AVFilterGraph* get_filter_graph() {
AVFilterGraph* ptr = avfilter_graph_alloc();
if (!ptr)
throw std::runtime_error("Failed to allocate resouce.");
return ptr;
}
} // namespace
AVFilterGraphPtr::AVFilterGraphPtr()
: Wrapper<AVFilterGraph, AVFilterGraphDeleter>(get_filter_graph()) {}
void AVFilterGraphPtr::reset() {
ptr.reset(get_filter_graph());
}
} // namespace ffmpeg
} // namespace torchaudio