bool DecodeClipFromMemoryBuffer()

in caffe2_customized_ops/video/customized_video_io.cc [407:494]


bool DecodeClipFromMemoryBuffer(
    const char* video_buffer,
    const int size,
    const int start_frm,
    const int length,
    const int height,
    const int width,
    const int sampling_rate,
    float*& buffer,
    std::mt19937* randgen) {
  Params params;
  std::vector<std::unique_ptr<DecodedFrame>> sampledFrames;
  CustomVideoDecoder decoder;

  params.outputHeight_ = height ? height : -1;
  params.outputWidth_ = width ? width : -1;
  params.maximumOutputFrames_ = MAX_DECODING_FRAMES;

  bool isTemporalJitter = (start_frm < 0);

  // decoder.decodeMemory(
  //     video_buffer,
  //     size,
  //     params,
  //     sampledFrames,
  //     length * sampling_rate,
  //     !isTemporalJitter);
  //
  // if (sampledFrames.size() < length * sampling_rate) {
  //   /* selective decoding failed. Decode all frames. */
  //   decoder.decodeMemory(video_buffer, size, params, sampledFrames);
  // }
  decoder.decodeMemory(video_buffer, size, params, sampledFrames);

  buffer = nullptr;
  int offset = 0;
  int channel_size = 0;
  int image_size = 0;
  int data_size = 0;

  int use_start_frm = start_frm;
  if (start_frm < 0) { // perform temporal jittering
    if ((int)(sampledFrames.size() - length * sampling_rate) > 0) {
      use_start_frm = std::uniform_int_distribution<>(
          0, (int)(sampledFrames.size() - length * sampling_rate))(*randgen);
    } else {
      use_start_frm = 0;
    }
  }

  if (sampledFrames.size() == 0) {
    LOG(ERROR) << "This video is empty.";
    buffer = nullptr;
    return true;
  }

  for (int idx = 0; idx < length; idx ++){
    int i = use_start_frm + idx * sampling_rate;
    // TODO{km}: consider cylindric sampling
    i = i % (int)(sampledFrames.size());  // periodic sampling
    if (idx == 0) {
      image_size = sampledFrames[i]->height_ * sampledFrames[i]->width_;
      channel_size = image_size * length;
      data_size = channel_size * 3;
      buffer = new float[data_size];
    }

    for (int c = 0; c < 3; c++) {
      ImageDataToBuffer(
          (unsigned char*)sampledFrames[i]->data_.get(),
          sampledFrames[i]->height_,
          sampledFrames[i]->width_,
          buffer + c * channel_size + offset,
          c);
    }
    offset += image_size;
  }
  CAFFE_ENFORCE(offset == channel_size, "Wrong offset size");

  // free the sampledFrames
  for (int i = 0; i < sampledFrames.size(); i++) {
    DecodedFrame* p = sampledFrames[i].release();
    delete p;
  }
  sampledFrames.clear();

  return true;
}