in caffe2_customized_ops/video/customized_video_io.cc [735:825]
bool DecodeClipFromMemoryBufferFlex(
const char* video_buffer,
const int size,
const int start_frm,
const int length,
int & height,
int & width,
const int sampling_rate,
float*& buffer,
std::mt19937* randgen) {
Params params;
std::vector<std::unique_ptr<DecodedFrame>> sampledFrames;
CustomVideoDecoder decoder;
params.outputHeight_ = -1;
params.outputWidth_ = -1;
params.maximumOutputFrames_ = MAX_DECODING_FRAMES;
// ----------- usable with selective decoding
// bool isTemporalJitter = (start_frm < 0);
// decoder.decodeMemory(
// video_buffer,
// size,
// params,
// sampledFrames,
// length * sampling_rate,
// !isTemporalJitter);
//
// if (sampledFrames.size() < length * sampling_rate) {
// /* selective decoding failed. Decode all frames. */
// decoder.decodeMemory(video_buffer, size, params, sampledFrames);
// }
// ----------- usable with selective decoding
decoder.decodeMemory(video_buffer, size, params, sampledFrames);
buffer = nullptr;
int offset = 0;
int channel_size = 0;
int image_size = 0;
int data_size = 0;
int use_start_frm = start_frm;
if (start_frm < 0) { // perform temporal jittering
if ((int)(sampledFrames.size() - length * sampling_rate) > 0) {
use_start_frm = std::uniform_int_distribution<>(
0, (int)(sampledFrames.size() - length * sampling_rate))(*randgen);
} else { use_start_frm = 0; }
}
if (sampledFrames.size() == 0) {
LOG(ERROR) << "This video is empty.";
buffer = nullptr;
return true;
}
height = (int)sampledFrames[0]->height_ ;
width = (int)sampledFrames[0]->width_;
for (int idx = 0; idx < length; idx ++){
int i = use_start_frm + idx * sampling_rate;
// TODO{km}: consider cylindric sampling
i = i % (int)(sampledFrames.size());
if (idx == 0) {
image_size = sampledFrames[i]->height_ * sampledFrames[i]->width_;
channel_size = image_size * length;
data_size = channel_size * 3;
buffer = new float[data_size];
}
for (int c = 0; c < 3; c++) {
ImageDataToBuffer(
(unsigned char*)sampledFrames[i]->data_.get(),
sampledFrames[i]->height_,
sampledFrames[i]->width_,
buffer + c * channel_size + offset,
c);
}
offset += image_size;
}
CAFFE_ENFORCE(offset == channel_size, "Wrong offset size");
// free the sampledFrames
for (int i = 0; i < sampledFrames.size(); i++) {
DecodedFrame* p = sampledFrames[i].release();
delete p;
}
sampledFrames.clear();
return true;
}