OperatorBase::template GetSingleArgument()

in ops/av_input_op.h [302:422]


          OperatorBase::template GetSingleArgument<int>("length_rgb", 0)),
      sampling_rate_rgb_(OperatorBase::template GetSingleArgument<int>(
          "sampling_rate_rgb",
          1)),
      length_of_(OperatorBase::template GetSingleArgument<int>("length_of", 0)),
      sampling_rate_of_(
          OperatorBase::template GetSingleArgument<int>("sampling_rate_of", 1)),
      frame_gap_of_(
          OperatorBase::template GetSingleArgument<int>("frame_gap_of", 1)),
      random_mirror_(OperatorBase::template GetSingleArgument<bool>(
          "random_mirror",
          true)),
      num_of_class_(
          OperatorBase::template GetSingleArgument<int>("num_of_class", 0)),
      use_local_file_(OperatorBase::template GetSingleArgument<bool>(
          "use_local_file",
          false)),
      random_crop_(
          OperatorBase::template GetSingleArgument<bool>("random_crop", true)),
      decode_type_(
          OperatorBase::template GetSingleArgument<int>("decode_type", 0)),
      video_res_type_(
          OperatorBase::template GetSingleArgument<int>("video_res_type", 0)),
      get_rgb_(OperatorBase::template GetSingleArgument<bool>(
          "get_rgb",
          false)),
      get_logmels_(OperatorBase::template GetSingleArgument<bool>("get_logmels",
          false)),
      get_video_id_(OperatorBase::template GetSingleArgument<bool>(
          "get_video_id",
          false)),
      do_multi_label_(OperatorBase::template GetSingleArgument<bool>(
          "do_multi_label",
          false)),
      logMelFrames_(OperatorBase::template GetSingleArgument<int>(
          "logmel_frames", kNumLogMelFrames)),
      logMelFilters_(OperatorBase::template GetSingleArgument<int>(
          "logmel_filters", kNumLogMelFilters)),
      logMelWindowSizeMs_(OperatorBase::template GetSingleArgument<int>(
          "logmel_winsize_ms", kWindowLength)),
      logMelWindowStepMs_(OperatorBase::template GetSingleArgument<int>(
          "logmel_winstep_ms", kWindowStep)),
      logMelAudioSamplingRate_(OperatorBase::template GetSingleArgument<int>(
          "logmel_audio_sr", kAudioSamplingRate)),
      align_audio_(OperatorBase::template GetSingleArgument<int>(
          "align_audio", 1)),
      audio_length_(OperatorBase::template GetSingleArgument<int>(
          "audio_length", 0)),
      tune_audio_step_(OperatorBase::template GetSingleArgument<bool>(
          "tune_audio_step",
          false)),
      num_decode_threads_(OperatorBase::template GetSingleArgument<int>(
          "num_decode_threads", 4)),
      thread_pool_(std::make_shared<TaskThreadPool>(num_decode_threads_)) {
  try {
    num_of_required_frame_ = 0;
    // mean and std for normalizing different optical flow data type;
    // Note that the statistics are generated from SOA, and you may
    // want to change them if you are running on a different dataset;
    // Each dimension represents: horizontal component of optical flow,
    // vertical component of optical flow, magnitude of optical flow,
    // Gray, R, G, B.
    const std::vector<float> InputDataMean = {0.0046635, 0.0046261,
        0.963986, 102.976, 110.201, 100.64, 95.9966};
    const std::vector<float> InputDataStd = {0.972347, 0.755146,
        1.43588, 55.3691, 58.1489, 56.4701, 55.3324};
    // if we need RGB as an input
    if (get_rgb_) {
      // how many frames we need for RGB
      num_of_required_frame_ = std::max(
          num_of_required_frame_, (length_rgb_ - 1) * sampling_rate_rgb_ + 1);

      channels_rgb_ = 3;
      for (int i = 4; i < 7; i++) {
        mean_rgb_.push_back(InputDataMean[i]);
        inv_std_rgb_.push_back(1.f / InputDataStd[i]);
      }
    }

    CheckParamsAndPrint();
    // Always need a dbreader, even when using local video files
    CAFFE_ENFORCE_GT(
        operator_def.input_size(), 0, "Need to have a DBReader blob input");

    vector<int64_t> data_shape(5);
    vector<int64_t> label_shape(2);
    vector<int64_t> logmels_shape(4);

    // for RGB data
    data_shape[0] = batch_size_ * clip_per_video_;
    data_shape[1] = channels_rgb_;
    data_shape[2] = length_rgb_;
    data_shape[3] = crop_size_;
    data_shape[4] = crop_size_;
    prefetched_clip_rgb_.Resize(data_shape);

    // for audio data
    logmels_shape[0] = batch_size_ * clip_per_video_;
    logmels_shape[1] = 1;
    logmels_shape[2] = logMelFrames_;
    logmels_shape[3] = logMelFilters_;
    prefetched_clip_logmels_.Resize(logmels_shape);

    // If do_multi_label is used, output label is a binary vector
    // of length num_of_class indicating which labels present
    if (do_multi_label_) {
      label_shape[0] = batch_size_ * clip_per_video_;
      label_shape[1] = num_of_class_;
      prefetched_label_.Resize(label_shape);
    } else {
      prefetched_label_.Resize(
          vector<int64_t>(1, batch_size_ * clip_per_video_));
    }

    prefetched_video_id_.Resize(
        vector<int64_t>(1, batch_size_ * clip_per_video_));
  } catch (const std::exception& exc) {
    std::cerr << "While calling AVInputOp initialization\n";
    std::cerr << exc.what();
  }
}