in src/io/iter_image_recordio_2.cc [296:439]
inline void ImageRecordIOParser2<DType>::ParseChunk(dmlc::InputSplit::Blob * chunk) {
temp_.resize(param_.preprocess_threads);
#if MXNET_USE_OPENCV
// save opencv out
#pragma omp parallel num_threads(param_.preprocess_threads)
{
CHECK(omp_get_num_threads() == param_.preprocess_threads);
int tid = omp_get_thread_num();
dmlc::RecordIOChunkReader reader(*chunk, tid, param_.preprocess_threads);
ImageRecordIO rec;
dmlc::InputSplit::Blob blob;
// image data
InstVector<DType> &out = temp_[tid];
out.Clear();
while (reader.NextRecord(&blob)) {
// Opencv decode and augments
cv::Mat res;
rec.Load(blob.dptr, blob.size);
cv::Mat buf(1, rec.content_size, CV_8U, rec.content);
switch (param_.data_shape[0]) {
case 1:
res = cv::imdecode(buf, 0);
break;
case 3:
res = cv::imdecode(buf, 1);
break;
case 4:
// -1 to keep the number of channel of the encoded image, and not force gray or color.
res = cv::imdecode(buf, -1);
CHECK_EQ(res.channels(), 4)
<< "Invalid image with index " << rec.image_index()
<< ". Expected 4 channels, got " << res.channels();
break;
default:
LOG(FATAL) << "Invalid output shape " << param_.data_shape;
}
const int n_channels = res.channels();
for (auto& aug : augmenters_[tid]) {
res = aug->Process(res, nullptr, prnds_[tid].get());
}
out.Push(static_cast<unsigned>(rec.image_index()),
mshadow::Shape3(n_channels, res.rows, res.cols),
mshadow::Shape1(param_.label_width));
mshadow::Tensor<cpu, 3, DType> data = out.data().Back();
// For RGB or RGBA data, swap the B and R channel:
// OpenCV store as BGR (or BGRA) and we want RGB (or RGBA)
std::vector<int> swap_indices;
if (n_channels == 1) swap_indices = {0};
if (n_channels == 3) swap_indices = {2, 1, 0};
if (n_channels == 4) swap_indices = {2, 1, 0, 3};
std::uniform_real_distribution<float> rand_uniform(0, 1);
std::bernoulli_distribution coin_flip(0.5);
bool is_mirrored = (normalize_param_.rand_mirror && coin_flip(*(prnds_[tid])))
|| normalize_param_.mirror;
float contrast_scaled;
float illumination_scaled;
if (!std::is_same<DType, uint8_t>::value) {
contrast_scaled =
(rand_uniform(*(prnds_[tid])) * normalize_param_.max_random_contrast * 2
- normalize_param_.max_random_contrast + 1)*normalize_param_.scale;
illumination_scaled =
(rand_uniform(*(prnds_[tid])) * normalize_param_.max_random_illumination * 2
- normalize_param_.max_random_illumination) * normalize_param_.scale;
}
for (int i = 0; i < res.rows; ++i) {
uchar* im_data = res.ptr<uchar>(i);
for (int j = 0; j < res.cols; ++j) {
DType RGBA[4];
for (int k = 0; k < n_channels; ++k) {
RGBA[k] = im_data[swap_indices[k]];
}
if (!std::is_same<DType, uint8_t>::value) {
// normalize/mirror here to avoid memory copies
// logic from iter_normalize.h, function SetOutImg
if (normalize_param_.mean_r > 0.0f || normalize_param_.mean_g > 0.0f ||
normalize_param_.mean_b > 0.0f || normalize_param_.mean_a > 0.0f) {
// subtract mean per channel
RGBA[0] -= normalize_param_.mean_r;
if (n_channels >= 3) {
RGBA[1] -= normalize_param_.mean_g;
RGBA[2] -= normalize_param_.mean_b;
}
if (n_channels == 4) {
RGBA[3] -= normalize_param_.mean_a;
}
for (int k = 0; k < n_channels; ++k) {
RGBA[k] = RGBA[k] * contrast_scaled + illumination_scaled;
}
} else if (!meanfile_ready_ || normalize_param_.mean_img.length() == 0) {
// do not subtract anything
for (int k = 0; k < n_channels; ++k) {
RGBA[k] = RGBA[k] * normalize_param_.scale;
}
} else {
CHECK(meanfile_ready_);
for (int k = 0; k < n_channels; ++k) {
RGBA[k] = (RGBA[k] - meanimg_[k][i][j]) * contrast_scaled + illumination_scaled;
}
}
}
for (int k = 0; k < n_channels; ++k) {
if (!std::is_same<DType, uint8_t>::value) {
// normalize/mirror here to avoid memory copies
// logic from iter_normalize.h, function SetOutImg
if (is_mirrored) {
data[k][i][res.cols - j - 1] = RGBA[k];
} else {
data[k][i][j] = RGBA[k];
}
} else {
// do not do normalization in Uint8 reader
data[k][i][j] = RGBA[k];
}
}
im_data += n_channels;
}
}
mshadow::Tensor<cpu, 1> label = out.label().Back();
if (label_map_ != nullptr) {
mshadow::Copy(label, label_map_->Find(rec.image_index()));
} else if (rec.label != NULL) {
CHECK_EQ(param_.label_width, rec.num_label)
<< "rec file provide " << rec.num_label << "-dimensional label "
"but label_width is set to " << param_.label_width;
mshadow::Copy(label, mshadow::Tensor<cpu, 1>(rec.label,
mshadow::Shape1(rec.num_label)));
} else {
CHECK_EQ(param_.label_width, 1)
<< "label_width must be 1 unless an imglist is provided "
"or the rec file is packed with multi dimensional label";
label[0] = rec.header.label;
}
res.release();
}
}
#else
LOG(FATAL) << "Opencv is needed for image decoding and augmenting.";
#endif
}