in ops/av_decoder.cc [139:735]
void AVDecoder::decodeLoop(
const string& videoName,
VideoIOContext& ioctx,
const Params& params,
const int start_frm,
Callback& callback,
int& number_of_frames) {
AVPixelFormat pixFormat = params.pixelFormat_;
AVFormatContext* inputContext = avformat_alloc_context();
AVStream* videoStream_ = nullptr;
AVStream* audioStream_ = nullptr;
AVCodecContext* videoCodecContext_ = nullptr;
AVCodecContext* audioCodecContext_ = nullptr;
AVFrame* videoStreamFrame_ = nullptr;
AVFrame* audioStreamFrame_ = nullptr;
SwrContext* convertCtx_ = nullptr;
AVPacket packet;
av_init_packet(&packet); // init packet
SwsContext* scaleContext_ = nullptr;
try {
inputContext->pb = ioctx.get_avio();
inputContext->flags |= AVFMT_FLAG_CUSTOM_IO;
int ret = 0;
// Determining the input format:
int probeSz = 1 * 1024 + AVPROBE_PADDING_SIZE;
DecodedFrame::AvDataPtr probe((uint8_t*)av_malloc(probeSz));
memset(probe.get(), 0, probeSz);
int len = ioctx.read(probe.get(), probeSz - AVPROBE_PADDING_SIZE);
if (len < probeSz - AVPROBE_PADDING_SIZE) {
LOG(ERROR) << "Insufficient data to determine video format";
}
// seek back to start of stream
ioctx.seek(0, SEEK_SET);
unique_ptr<AVProbeData> probeData(new AVProbeData());
probeData->buf = probe.get();
probeData->buf_size = len;
probeData->filename = "";
// Determine the input-format:
inputContext->iformat = av_probe_input_format(probeData.get(), 1);
// this is to avoid the double-free error
if (inputContext->iformat == nullptr) {
LOG(ERROR) << "inputContext iformat is nullptr!";
}
ret = avformat_open_input(&inputContext, "", nullptr, nullptr);
if (ret < 0) {
LOG(ERROR) << "Unable to open stream : " << ffmpegErrorStr(ret);
return;
}
ret = avformat_find_stream_info(inputContext, nullptr);
if (ret < 0) {
LOG(ERROR) << "Unable to find stream info in " << videoName << " "
<< ffmpegErrorStr(ret);
return;
}
// Decode the first video stream
int videoStreamIndex_ = params.streamIndex_;
int audioStreamIndex_ = params.streamIndex_;
if (params.streamIndex_ == -1) {
for (int i = 0; i < inputContext->nb_streams; i++) {
auto stream = inputContext->streams[i];
if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
videoStreamIndex_ == -1) {
videoStreamIndex_ = i;
videoStream_ = stream;
} else if (
stream->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
audioStreamIndex_ == -1) {
audioStreamIndex_ = i;
audioStream_ = stream;
}
if (videoStreamIndex_ != -1 && audioStreamIndex_ != -1) {
break;
}
}
}
//Video or audio stream can be empty, but not both at the same time
const bool hasVideo = params.getVideo_ && videoStreamIndex_ >= 0;
const bool hasAudio = params.getAudio_ && audioStreamIndex_ >= 0;
if (!hasAudio && !hasVideo) {
LOG(ERROR) << "Neither video nor audio stream are being decoded in "
<< videoName << " : " << ffmpegErrorStr(ret)
<< " params.getVideo_=" << params.getVideo_
<< " params.getAudio_=" << params.getAudio_
<< " videoStreamIndex=" << videoStreamIndex_
<< " audioStreamIndex=" << audioStreamIndex_;
return;
}
// Initialize codec
AVDictionary* opts = nullptr;
if (params.getAudio_ && audioStreamIndex_ >= 0) {
audioStreamFrame_ = av_frame_alloc();
audioCodecContext_ = inputContext->streams[audioStreamIndex_]->codec;
ret = avcodec_open2(
audioCodecContext_,
avcodec_find_decoder(audioCodecContext_->codec_id),
nullptr);
if (ret < 0) {
const std::string codecName =
audioCodecContext_->codec != nullptr
&& audioCodecContext_->codec->name != nullptr ?
std::string(audioCodecContext_->codec->name) : "None";
LOG(ERROR) << "Cannot open audio codec : " << codecName;
}
convertCtx_ = swr_alloc_set_opts(
nullptr,
params.outlayout_,
params.outfmt_,
params.outrate_,
audioCodecContext_->channel_layout,
audioCodecContext_->sample_fmt,
audioCodecContext_->sample_rate,
0,
nullptr);
if (convertCtx_ == nullptr) {
LOG(ERROR) << ("Cannot setup sample format converter.");
}
if (swr_init(convertCtx_) < 0) {
LOG(ERROR) << ("Cannot init sample format converter.");
}
}
bool mustDecodeAll = false;
auto itvlIter = params.intervals_.begin();
double currFps = 0;
// frame index in video stream
int frameIndex = -1;
// frame index of outputed frames
int outputFrameIndex = -1;
double lastFrameTimestamp = -1.0;
double timestamp = -1.0;
long int start_ts = -1;
double prevTimestamp = 0;
int outWidth = 0;
int outHeight = 0;
if (params.getVideo_ && videoStreamIndex_ >= 0) {
videoCodecContext_ = videoStream_->codec;
try {
ret = avcodec_open2(
videoCodecContext_,
avcodec_find_decoder(videoCodecContext_->codec_id),
&opts);
} catch (const std::exception&) {
LOG(ERROR) << ("Exception during open video codec");
}
if (ret < 0) {
LOG(ERROR) << "Cannot open video codec : "
<< videoCodecContext_->codec->name;
return;
}
// Calculate if we need to rescale the frames
const int origWidth = videoCodecContext_->width;
const int origHeight = videoCodecContext_->height;
outWidth = origWidth;
outHeight = origHeight;
if (params.video_res_type_ == VideoResType::ORIGINAL_RES) {
// if the original resolution is too low,
// make it at least the same size as crop_size_
if (params.crop_size_ > origWidth || params.crop_size_ > origHeight) {
ResizeAndKeepAspectRatio(
origWidth,
origHeight,
params.crop_size_,
-1,
outWidth,
outHeight);
}
} else if (params.video_res_type_ == VideoResType::USE_SHORT_EDGE) {
// resize the image to the predefined
// short_edge_ resolution while keep the aspect ratio
ResizeAndKeepAspectRatio(
origWidth, origHeight, params.short_edge_, -1, outWidth, outHeight);
} else if (params.video_res_type_ == VideoResType::USE_WIDTH_HEIGHT) {
// resize the image to the predefined
// resolution and ignore the aspect ratio
outWidth = params.outputWidth_;
outHeight = params.outputHeight_;
} else {
LOG(ERROR) << "Unknown VideoResType: " << params.video_res_type_;
}
// Make sure that we have a valid format
if (videoCodecContext_->pix_fmt == AV_PIX_FMT_NONE) {
LOG(ERROR) << ("pixel format is not valid.");
}
// Create a scale context
scaleContext_ = sws_getContext(
videoCodecContext_->width,
videoCodecContext_->height,
videoCodecContext_->pix_fmt,
outWidth,
outHeight,
pixFormat,
SWS_FAST_BILINEAR,
nullptr,
nullptr,
nullptr);
// Getting video meta data
VideoMeta videoMeta;
videoMeta.codec_type = videoCodecContext_->codec_type;
videoMeta.width = outWidth;
videoMeta.height = outHeight;
videoMeta.pixFormat = pixFormat;
// avoid division by zero, code adapted from https://www.ffmpeg.org/doxygen/0.6/rational_8h-source.html
if (videoStream_->avg_frame_rate.num == 0 || videoStream_->avg_frame_rate.den == 0) {
LOG(ERROR) << ("Frame rate is wrong. No data found.");
}
videoMeta.fps = av_q2d(videoStream_->avg_frame_rate);
callback.videoDecodingStarted(videoMeta);
number_of_frames = videoStream_->nb_frames;
if (params.intervals_.size() == 0) {
LOG(ERROR) << ("Empty sampling intervals.");
}
if (itvlIter->timestamp != 0) {
LOG(ERROR) << ("Sampling interval starting timestamp is not zero.");
}
currFps = itvlIter->fps;
if (currFps < 0 && currFps != SpecialFps::SAMPLE_ALL_FRAMES &&
currFps != SpecialFps::SAMPLE_TIMESTAMP_ONLY) {
// fps must be 0, -1, -2 or > 0
LOG(ERROR) << ("Invalid sampling fps.");
}
prevTimestamp = itvlIter->timestamp;
itvlIter++;
if (itvlIter != params.intervals_.end() &&
prevTimestamp >= itvlIter->timestamp) {
LOG(ERROR) << ("Sampling interval timestamps must be strictly ascending.");
}
// Initialize frame and packet.
// These will be reused across calls.
videoStreamFrame_ = av_frame_alloc();
}
std::mt19937 meta_randgen(time(nullptr));
/* identify the starting point from where we must start decoding */
AVStream* stream = hasVideo ? videoStream_ : audioStream_;
const int64_t duration = av_rescale_q(stream->duration,
stream->time_base, AV_TIME_BASE_Q);
if ((videoStream_ != nullptr && videoStream_->duration > 0 &&
videoStream_->nb_frames > 0) || (audioStream_ != nullptr &&
audioStream_->duration > 0)) {
/* we have a valid duration and nb_frames. We can safely
* detect an intermediate timestamp to start decoding from. */
// leave a margin of 10 frames to take in to account the error
// from av_seek_frame
long int margin = 0;
if (hasVideo) {
margin = int(ceil((10 * videoStream_->duration)
/ (videoStream_->nb_frames)));
}
// if we need to do temporal jittering
if (params.decode_type_ == DecodeType::DO_TMP_JITTER) {
/* estimate the average duration for the required # of frames */
double maxFramesDuration = 0;
if (hasVideo){
maxFramesDuration =
(videoStream_->duration * params.num_of_required_frame_) /
(videoStream_->nb_frames);
} else {
maxFramesDuration = av_rescale_q(kAudioMargin, AV_TIME_BASE_Q,
audioStream_->time_base);
}
int ts1 = 0;
int ts2 = stream->duration - int(ceil(maxFramesDuration));
ts2 = ts2 > 0 ? ts2 : 0;
// pick a random timestamp between ts1 and ts2. ts2 is selected such
// that you have enough frames to satisfy the required # of frames.
start_ts = std::uniform_int_distribution<>(ts1, ts2)(meta_randgen);
// seek a frame at start_ts
const int64_t seekingTo = 0 > (start_ts - margin) ? \
0 : (start_ts - margin);
ret = av_seek_frame(
inputContext,
hasVideo ? videoStreamIndex_ : audioStreamIndex_,
seekingTo,
AVSEEK_FLAG_BACKWARD);
VLOG(2) << "Seeking to "
<< av_rescale_q(seekingTo, stream->time_base, AV_TIME_BASE_Q)
<< " / " << duration;
// if we need to decode from the start_frm
} else if (params.decode_type_ == DecodeType::USE_START_FRM) {
if (videoStream_ == nullptr) {
LOG(ERROR) << ("Nullptr found at videoStream_");
}
start_ts = int(floor(
(videoStream_->duration * start_frm)
/ (videoStream_->nb_frames)));
// seek a frame at start_ts
ret = av_seek_frame(
inputContext,
hasVideo ? videoStreamIndex_ : audioStreamIndex_,
0 > (start_ts - margin) ? 0 : (start_ts - margin),
AVSEEK_FLAG_BACKWARD);
} else {
mustDecodeAll = true;
}
if (ret < 0) {
LOG(INFO) << "Unable to decode from a random start point";
/* fall back to default decoding of all frames from start */
av_seek_frame(inputContext,
hasVideo ? videoStreamIndex_ : audioStreamIndex_,
0,
AVSEEK_FLAG_BACKWARD);
mustDecodeAll = true;
}
} else {
mustDecodeAll = true;
}
int gotPicture = 0;
int eof = 0;
int selectiveDecodedFrames = 0;
int maxFrames = (params.decode_type_ == DecodeType::DO_UNIFORM_SMP)
? MAX_DECODING_FRAMES
: params.num_of_required_frame_;
// There is a delay between reading packets from the
// transport and getting decoded frames back.
// Therefore, after EOF, continue going while
// the decoder is still giving us frames.
int ipacket = 0;
bool audioDecodeNeeded = hasAudio;
bool videoDecodeNeeded = hasVideo;
while (audioDecodeNeeded || videoDecodeNeeded){
audioDecodeNeeded = hasAudio && !eof;
videoDecodeNeeded = hasVideo &&
((!eof || gotPicture) &&
/* either you must decode all frames or decode upto maxFrames
* based on status of the mustDecodeAll flag */
(mustDecodeAll ||
((!mustDecodeAll) && (selectiveDecodedFrames < maxFrames))) &&
/* If on the last interval and not autodecoding keyframes and a
* SpecialFps indicates no more frames are needed, stop decoding */
!((itvlIter == params.intervals_.end() &&
(currFps == SpecialFps::SAMPLE_TIMESTAMP_ONLY ||
currFps == SpecialFps::SAMPLE_NO_FRAME)) &&
!params.keyFrames_));
try {
if (!eof) {
ret = av_read_frame(inputContext, &packet);
if (ret == AVERROR_EOF) {
eof = 1;
av_free_packet(&packet);
packet.data = NULL;
packet.size = 0;
// stay in the while loop to flush frames
} else if (ret == AVERROR(EAGAIN)) {
av_free_packet(&packet);
continue;
} else if (ret < 0) {
LOG(ERROR) << "Error reading packet : " << ffmpegErrorStr(ret);
}
ipacket++;
auto si = packet.stream_index;
if (params.getAudio_ && audioStreamIndex_ >= 0 &&
si == audioStreamIndex_ && audioDecodeNeeded) {
// Audio packets can have multiple audio frames in a single packet
while (packet.size > 0) {
if (audioCodecContext_ == nullptr ||
audioStreamFrame_ == nullptr ||
convertCtx_ == nullptr) {
continue;
}
getAudioSample(
packet,
audioCodecContext_,
audioStreamFrame_,
convertCtx_,
callback,
params);
}
if (audioStreamFrame_ == nullptr) {
continue;
}
av_frame_unref(audioStreamFrame_);
}
if (si != videoStreamIndex_) {
av_free_packet(&packet);
continue;
}
}
if (params.getVideo_ && videoStreamIndex_ >= 0 && videoDecodeNeeded) {
if (videoCodecContext_ == nullptr or videoStreamFrame_ == nullptr) {
continue;
}
ret = avcodec_decode_video2(
videoCodecContext_, videoStreamFrame_, &gotPicture, &packet);
if (ret < 0) {
LOG(ERROR) << "Error decoding video frame : " << ffmpegErrorStr(ret);
}
try {
// Nothing to do without a picture
if (!gotPicture) {
av_free_packet(&packet);
continue;
}
frameIndex++;
if (videoStreamFrame_ == nullptr) {
continue;
}
long int frame_ts =
av_frame_get_best_effort_timestamp(videoStreamFrame_);
timestamp = frame_ts * av_q2d(videoStream_->time_base);
if ((frame_ts >= start_ts && !mustDecodeAll) || mustDecodeAll) {
/* process current frame if:
* 1) We are not doing selective decoding and mustDecodeAll
* OR
* 2) We are doing selective decoding and current frame
* timestamp is >= start_ts from where we start selective
* decoding*/
// if reaching the next interval, update the current fps
// and reset lastFrameTimestamp so the current frame could be
// sampled (unless fps == SpecialFps::SAMPLE_NO_FRAME)
if (itvlIter != params.intervals_.end() &&
timestamp >= itvlIter->timestamp) {
lastFrameTimestamp = -1.0;
currFps = itvlIter->fps;
prevTimestamp = itvlIter->timestamp;
itvlIter++;
if (itvlIter != params.intervals_.end() &&
prevTimestamp >= itvlIter->timestamp) {
LOG(ERROR) << (
"Sampling interval timestamps must be strictly "
"ascending.");
}
}
// keyFrame will bypass all checks on fps sampling settings
bool keyFrame = params.keyFrames_ && videoStreamFrame_->key_frame;
if (!keyFrame) {
//if fps == SpecialFps::SAMPLE_NO_FRAME (0), don't sample at all
if (currFps == SpecialFps::SAMPLE_NO_FRAME) {
av_free_packet(&packet);
continue;
}
// fps is considered reached in the following cases:
// 1. lastFrameTimestamp < 0 - start of a new interval
// (or first frame)
// 2. currFps == SpecialFps::SAMPLE_ALL_FRAMES (-1) - sample
// every frame
// 3. timestamp - lastFrameTimestamp has reached target fps and
// currFps > 0 (not special fps setting)
// different modes for fps:
// SpecialFps::SAMPLE_NO_FRAMES (0):
// disable fps sampling, no frame sampled at all
// SpecialFps::SAMPLE_ALL_FRAMES (-1):
// unlimited fps sampling, will sample at native video fps
// SpecialFps::SAMPLE_TIMESTAMP_ONLY (-2):
// disable fps sampling, but will get the frame at specific
// timestamp
// others (> 0): decoding at the specified fps
bool fpsReached = lastFrameTimestamp < 0 ||
currFps == SpecialFps::SAMPLE_ALL_FRAMES ||
(currFps > 0 &&
timestamp >= lastFrameTimestamp + (1 / currFps));
if (!fpsReached) {
av_free_packet(&packet);
continue;
}
}
lastFrameTimestamp = timestamp;
outputFrameIndex++;
if (params.maximumOutputFrames_ != -1 &&
outputFrameIndex >= params.maximumOutputFrames_) {
// enough frames
av_free_packet(&packet);
break;
}
AVFrame* rgbFrame = av_frame_alloc();
if (!rgbFrame) {
LOG(ERROR) << ("Error allocating AVframe");
}
try {
// Determine required buffer size and allocate buffer
int numBytes =
avpicture_get_size(pixFormat, outWidth, outHeight);
DecodedFrame::AvDataPtr buffer(
(uint8_t*) av_malloc(numBytes * sizeof(uint8_t)));
int size = avpicture_fill(
(AVPicture*) rgbFrame,
buffer.get(),
pixFormat,
outWidth,
outHeight);
if (scaleContext_ == nullptr) {
continue;
}
sws_scale(
scaleContext_,
videoStreamFrame_->data,
videoStreamFrame_->linesize,
0,
videoCodecContext_->height,
rgbFrame->data,
rgbFrame->linesize);
unique_ptr<DecodedFrame> frame = make_unique<DecodedFrame>();
frame->width_ = outWidth;
frame->height_ = outHeight;
frame->data_ = move(buffer);
frame->size_ = size;
frame->index_ = frameIndex;
frame->outputFrameIndex_ = outputFrameIndex;
frame->timestamp_ = timestamp;
frame->keyFrame_ = videoStreamFrame_->key_frame;
callback.frameDecoded(std::move(frame));
selectiveDecodedFrames++;
av_frame_free(&rgbFrame);
} catch (const std::exception&) {
av_frame_free(&rgbFrame);
}
}
if (videoStreamFrame_ != nullptr) {
av_frame_unref(videoStreamFrame_);
}
} catch (const std::exception&) {
if (videoStreamFrame_ != nullptr) {
av_frame_unref(videoStreamFrame_);
}
}
}
av_free_packet(&packet);
} catch (const std::exception& exception) {
LOG(ERROR) << "Caught an exception" << exception.what();
av_free_packet(&packet);
}
} // of while loop
callback.videoDecodingEnded(timestamp);
// free all stuffs
if (scaleContext_ != nullptr) {
sws_freeContext(scaleContext_);
}
swr_free(&convertCtx_);
av_packet_unref(&packet);
av_frame_free(&videoStreamFrame_);
av_frame_free(&audioStreamFrame_);
if (videoCodecContext_ != nullptr) {
avcodec_close(videoCodecContext_);
}
if (audioCodecContext_ != nullptr) {
avcodec_close(audioCodecContext_);
}
avformat_close_input(&inputContext);
avformat_free_context(inputContext);
} catch (const std::exception& exception) {
LOG(ERROR) << "Caught an exception" << exception.what();
// In case of decoding error
// free all stuffs
sws_freeContext(scaleContext_);
swr_free(&convertCtx_);
av_packet_unref(&packet);
av_frame_free(&videoStreamFrame_);
av_frame_free(&audioStreamFrame_);
avcodec_close(videoCodecContext_);
avcodec_close(audioCodecContext_);
avformat_close_input(&inputContext);
avformat_free_context(inputContext);
}
}