inline void ImageDetRecordIOParser::Init()

in src/io/iter_image_det_recordio.cc [224:352]


inline void ImageDetRecordIOParser<DType>::Init(
    const std::vector<std::pair<std::string, std::string> >& kwargs) {
#if MXNET_USE_OPENCV
  // initialize parameter
  // init image rec param
  param_.InitAllowUnknown(kwargs);
  int maxthread, threadget;
  #pragma omp parallel
  {
    // be conservative, set number of real cores - 1
    maxthread = std::max(omp_get_num_procs() - 1, 1);
  }
  param_.preprocess_threads = std::min(maxthread, param_.preprocess_threads);
  #pragma omp parallel num_threads(param_.preprocess_threads)
  {
    threadget = omp_get_num_threads();
  }
  param_.preprocess_threads = threadget;

  std::vector<std::string> aug_names = dmlc::Split(param_.aug_seq, ',');
  augmenters_.clear();
  augmenters_.resize(threadget);
  // setup decoders
  for (int i = 0; i < threadget; ++i) {
    for (const auto& aug_name : aug_names) {
      augmenters_[i].emplace_back(ImageAugmenter::Create(aug_name));
      augmenters_[i].back()->Init(kwargs);
    }
    prnds_.emplace_back(new common::RANDOM_ENGINE((i + 1) * kRandMagic));
  }
  if (param_.path_imglist.length() != 0) {
    label_map_.reset(new ImageDetLabelMap(param_.path_imglist.c_str(),
      param_.label_width, !param_.verbose));
  }
  CHECK(param_.path_imgrec.length() != 0)
      << "ImageDetRecordIOIterator: must specify image_rec";

  if (param_.verbose) {
    LOG(INFO) << "ImageDetRecordIOParser: " << param_.path_imgrec
              << ", use " << threadget << " threads for decoding..";
  }
  source_.reset(dmlc::InputSplit::Create(
      param_.path_imgrec.c_str(), param_.part_index,
      param_.num_parts, "recordio"));

  // estimate padding width for labels
  int max_label_width = 0;
  if (label_map_ != nullptr) {
    max_label_width = label_map_->MaxLabelWidth();
  } else {
    // iterate through recordio
    dmlc::InputSplit::Blob chunk;
    while (source_->NextChunk(&chunk)) {
      #pragma omp parallel num_threads(param_.preprocess_threads)
      {
        CHECK(omp_get_num_threads() == param_.preprocess_threads);
        int max_width = 0;
        int tid = omp_get_thread_num();
        dmlc::RecordIOChunkReader reader(chunk, tid, param_.preprocess_threads);
        ImageRecordIO rec;
        dmlc::InputSplit::Blob blob;
        while (reader.NextRecord(&blob)) {
          rec.Load(blob.dptr, blob.size);
          if (rec.label != NULL) {
            if (param_.label_width > 0) {
              CHECK_EQ(param_.label_width, rec.num_label)
                << "rec file provide " << rec.num_label << "-dimensional label "
                   "but label_width is set to " << param_.label_width;
            }
            // update max value
            max_width = std::max(max_width, rec.num_label);
          } else {
            LOG(FATAL) << "Not enough label packed in img_list or rec file.";
          }
        }
        #pragma omp critical
        {
          max_label_width = std::max(max_label_width, max_width);
        }
      }
    }
  }
  if (max_label_width > param_.label_pad_width) {
    if (param_.label_pad_width > 0) {
      LOG(FATAL) << "ImageDetRecordIOParser: label_pad_width: "
        << param_.label_pad_width << " smaller than estimated width: "
        << max_label_width;
    }
    param_.label_pad_width = max_label_width;
  }
  if (param_.verbose) {
    LOG(INFO) << "ImageDetRecordIOParser: " << param_.path_imgrec
              << ", label padding width: " << param_.label_pad_width;
  }

  source_.reset(dmlc::InputSplit::Create(
      param_.path_imgrec.c_str(), param_.part_index,
      param_.num_parts, "recordio"));

  if (param_.shuffle_chunk_size > 0) {
    if (param_.shuffle_chunk_size > 4096) {
      LOG(INFO) << "Chunk size: " << param_.shuffle_chunk_size
                 << " MB which is larger than 4096 MB, please set "
                    "smaller chunk size";
    }
    if (param_.shuffle_chunk_size < 4) {
      LOG(INFO) << "Chunk size: " << param_.shuffle_chunk_size
                 << " MB which is less than 4 MB, please set "
                    "larger chunk size";
    }
    // 1.1 ratio is for a bit more shuffle parts to avoid boundary issue
    unsigned num_shuffle_parts =
        std::ceil(source_->GetTotalSize() * 1.1 /
                  (param_.num_parts * (param_.shuffle_chunk_size << 20UL)));

    if (num_shuffle_parts > 1) {
      source_.reset(dmlc::InputSplitShuffle::Create(
          param_.path_imgrec.c_str(), param_.part_index,
          param_.num_parts, "recordio", num_shuffle_parts, param_.shuffle_chunk_seed));
    }
    source_->HintChunkSize(param_.shuffle_chunk_size << 17UL);
  } else {
    // use 64 MB chunk when possible
    source_->HintChunkSize(8 << 20UL);
  }
#else
  LOG(FATAL) << "ImageDetRec need opencv to process";
#endif
}