source/util/CvUtil.h

/** * Copyright 2004-present Facebook. All Rights Reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <vector> #include <gflags/gflags.h> #include <glog/logging.h> #include <opencv2/calib3d.hpp> #include <opencv2/core.hpp> #include <opencv2/highgui.hpp> #include <opencv2/imgproc.hpp> #include <folly/Format.h> #include "source/util/FilesystemUtil.h" #include "source/util/MathUtil.h" #include "source/util/RawUtil.h" #include "source/util/SystemUtil.h" #include "source/util/ThreadPool.h" namespace fb360_dep { namespace cv_util { // wrapper for cv::imread which throws an exception if loading fails cv::Mat imreadExceptionOnFail(const filesystem::path& filename, const int flags = cv::IMREAD_COLOR); // wrapper for cv::imwrite which throws an exception if writing fails void imwriteExceptionOnFail( const filesystem::path& filename, const cv::Mat& image, const std::vector<int>& params = {}); // given a vector of images, stack them horizontally or vertically to form a larger image template <typename T> cv::Mat_<T> stackHorizontal(const std::vector<cv::Mat_<T>>& images) { assert(!images.empty()); if (images.size() == 1) { return images[0]; } cv::Mat_<T> stacked = images[0].clone(); for (int i = 1; i < int(images.size()); ++i) { cv::hconcat(stacked, images[i], stacked); } return stacked; } template <typename T> cv::Mat_<T> stackVertical(const std::vector<cv::Mat_<T>>& images) { assert(!images.empty()); if (images.size() == 1) { return images[0]; } cv::Mat_<T> stacked = images[0].clone(); for (int i = 1; i < int(images.size()); ++i) { vconcat(stacked, images[i], stacked); } return stacked; } // returns the first 3 elements of a 4-d vector static inline cv::Vec3f head3(const cv::Vec4f& v) { return cv::Vec3f(v[0], v[1], v[2]); } // for 1-channel float mats (e.g., depth maps), we use PFM format. // notes on the PFM format: http://www.pauldebevec.com/Research/HDR/PFM/ void writeCvMat32FC1ToPFM(const filesystem::path& path, const cv::Mat_<float>& m); cv::Mat_<float> readCvMat32FC1FromPFM(const filesystem::path& path); template <typename T> const T& clampToEdge(const cv::Mat_<T>& src, const int x, const int y) { return src(math_util::clamp(y, 0, src.rows - 1), math_util::clamp(x, 0, src.cols - 1)); } template <typename T> T bilerp(const T& p00, const T& p01, const T& p10, const T& p11, float xw, float yw) { return (1 - xw) * (1 - yw) * p00 + xw * (1 - yw) * p01 + (1 - xw) * yw * p10 + xw * yw * p11; } // partial specialization for opencv's Vec<T, N> type as all the rounding and clamping cause // severe performance and precision problems template <typename T, int N> cv::Vec<float, N> bilerp( const cv::Vec<T, N>& p00, const cv::Vec<T, N>& p01, const cv::Vec<T, N>& p10, const cv::Vec<T, N>& p11, float xw, float yw) { cv::Vec<float, N> result; for (int i = 0; i < N; ++i) { result[i] = bilerp(p00[i], p01[i], p10[i], p11[i], xw, yw); } return result; } // perform bilinear interpolation with clamp-to-edge semantics // NOTE: uses normal coordinate convention, i.e. integer pixel corners. NOT OPENCV's template <typename T, typename ResultType = T> ResultType getPixelBilinear(const cv::Mat_<T>& src, const float x, const float y) { const float xf = round(x); const float yf = round(y); const int xi = xf; const int yi = yf; return bilerp( clampToEdge(src, xi - 1, yi - 1), clampToEdge(src, xi, yi - 1), clampToEdge(src, xi - 1, yi), clampToEdge(src, xi, yi), x - xf + 0.5f, y - yf + 0.5f); } inline cv::Mat removeAlpha(const cv::Mat& src) { if (src.channels() < 4) { return src.clone(); } cv::Mat dst; cvtColor(src, dst, cv::COLOR_BGRA2BGR); return dst; } inline cv::Mat extractAlpha(const cv::Mat& src) { CHECK_EQ(src.channels(), 4) << "no alpha channel!"; cv::Mat alpha; cv::extractChannel(src, alpha, 3); return alpha; } template <typename T> inline cv::Mat_<T> resizeImage(const cv::Mat_<T>& image, const cv::Size& size, const int type = cv::INTER_AREA) { if (image.empty() || image.size() == size) { return image; } cv::Mat_<T> imageResized; cv::resize(image, imageResized, size, 0, 0, type); return imageResized; } template <typename T> inline cv::Mat_<T> scaleImage(const cv::Mat_<T>& image, const double scaleFactor, const int type = cv::INTER_AREA) { const cv::Size size(std::round(image.cols * scaleFactor), std::round(image.rows * scaleFactor)); return resizeImage(image, size, type); } template <typename T> inline std::vector<cv::Mat_<T>> resizeImages( const std::vector<cv::Mat_<T>>& imagesIn, const cv::Size& size, const int type = cv::INTER_AREA, const int numThreads = -1) { std::vector<cv::Mat_<T>> imagesOut(imagesIn.size()); ThreadPool threadPool(numThreads); for (int i = 0; i < int(imagesIn.size()); ++i) { threadPool.spawn([&, i] { imagesOut[i] = resizeImage<T>(imagesIn[i], size, type); }); } threadPool.join(); return imagesOut; } inline float maxPixelValueFromCvDepth(int cvDepth) { if (cvDepth == CV_8U) { return 255.0f; } else if (cvDepth == CV_16U) { return 65535.0f; } else if (cvDepth == CV_32F) { return 1.0f; } else { LOG(FATAL) << folly::sformat("Depth not supported: {}", cvDepth); return 0.0f; } } template <typename _Tp, int cn> inline static cv::Vec<_Tp, cn> absdiff(const cv::Vec<_Tp, cn>& a, const cv::Vec<_Tp, cn>& b) { cv::Vec<_Tp, cn> result; for (int i = 0; i < cn; ++i) { result[i] = abs(a[i] - b[i]); } } inline float maxPixelValue(const cv::Mat& mat) { return maxPixelValueFromCvDepth(mat.depth()); } inline cv::Mat convertTo(const cv::Mat& srcImage, const int cvDepth) { cv::Mat dstImage; if (srcImage.depth() == cvDepth) { srcImage.copyTo(dstImage); } else { srcImage.convertTo( dstImage, cvDepth, maxPixelValueFromCvDepth(cvDepth) / maxPixelValue(srcImage)); } CHECK_EQ(dstImage.depth(), cvDepth) << " was expecting depth: " << cvDepth << ", dstImage.depth() is actually: " << dstImage.depth(); return dstImage; } template <typename T> inline cv::Mat convertTo(const cv::Mat& srcImage); template <> inline cv::Mat convertTo<uint8_t>(const cv::Mat& srcImage) { return convertTo(srcImage, CV_8U); } template <> inline cv::Mat convertTo<uint16_t>(const cv::Mat& srcImage) { return convertTo(srcImage, CV_16U); } template <> inline cv::Mat convertTo<float>(const cv::Mat& srcImage) { return convertTo(srcImage, CV_32F); } template <typename T> inline cv::Mat_<T> convertImage(const cv::Mat& imageIn) { // Create a dummy mat that will hold type, depth and channels for a given T cv::Mat_<T> infoMat; // Convert to desired depth cv::Mat imageOut = convertTo(imageIn, infoMat.depth()); // Special case: OpenCV doesn't have a bool type, so Mat_<bool> is CV_8U [0..255] // We need to force values to be in [0..1] if (std::is_same<T, bool>::value) { cv::threshold(imageOut, imageOut, 127, 1, cv::THRESH_BINARY); } // Convert to desired number of channels const int chI = imageIn.channels(); const int chO = infoMat.channels(); if (chI == chO) { return imageOut; } if (chI == 1 && chO == 3) { cv::cvtColor(imageOut, imageOut, cv::COLOR_GRAY2BGR); } else if (chI == 1 && chO == 4) { cv::cvtColor(imageOut, imageOut, cv::COLOR_GRAY2BGRA); } else if (chI == 3 && chO == 1) { cv::cvtColor(imageOut, imageOut, cv::COLOR_BGR2GRAY); } else if (chI == 3 && chO == 4) { cv::cvtColor(imageOut, imageOut, cv::COLOR_BGR2BGRA); } else if (chI == 4 && chO == 1) { cv::cvtColor(imageOut, imageOut, cv::COLOR_BGRA2GRAY); } else if (chI == 4 && chO == 3) { cv::cvtColor(imageOut, imageOut, cv::COLOR_BGRA2BGR); } else { CHECK(false) << "Conversion from " << chI << " channels to " << chO << " channels not supported"; } return imageOut; } inline cv::Mat loadImageUnchanged(const filesystem::path& filename) { cv::Mat image; if (filename.extension() == ".raw") { image = rawToRgb(filename); } else if (filename.extension() == ".pfm") { image = cv_util::readCvMat32FC1FromPFM(filename); } else { image = imreadExceptionOnFail(filename, cv::IMREAD_UNCHANGED); } return image; } template <typename T> inline cv::Mat_<T> loadImage(const filesystem::path& filename) { cv::Mat image = loadImageUnchanged(filename); return convertImage<T>(image); } template <typename T> inline cv::Mat_<T> loadScaledImage( const filesystem::path& filename, const double scaleFactor, const int type = cv::INTER_LINEAR) { return scaleImage(loadImage<T>(filename), scaleFactor, type); } template <typename T> inline cv::Mat_<T> loadResizedImage( const filesystem::path& filename, const cv::Size& size, const int type = cv::INTER_LINEAR) { return resizeImage(loadImage<T>(filename), size, type); } template <typename T> inline cv::Mat_<T> gaussianBlur(const cv::Mat_<T>& mat, const int blurRadius, const float sigma = 0.0f) { if (blurRadius < 1) { return mat; } cv::Mat_<T> matBlur; const int w = 2 * blurRadius + 1; cv::GaussianBlur(mat, matBlur, cv::Size(w, w), sigma, 0); return matBlur; } template <typename T> inline cv::Mat_<T> blur(const cv::Mat_<T>& mat, const int blurRadius = 1) { if (blurRadius < 1) { return mat; } cv::Mat_<T> matBlur; const int w = 2 * blurRadius + 1; cv::blur(mat, matBlur, cv::Size(w, w)); return matBlur; } inline cv::Mat_<bool> dilate(const cv::Mat_<bool>& mat, const int dilateRadius = 1) { if (dilateRadius < 1) { return mat; } cv::Mat_<bool> matDilated; const int w = 2 * dilateRadius + 1; const cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(w, w)); cv::dilate(mat, matDilated, element); return matDilated; } inline cv::Mat_<float> maskedMedianBlur( const cv::Mat_<float>& mat, const cv::Mat_<float>& background, const cv::Mat_<bool>& mask, const int radius, const bool ignoreNan = true) { cv::Mat_<float> blurred(mat.size(), 0.0); for (int y = 0; y < mat.rows; ++y) { for (int x = 0; x < mat.cols; ++x) { std::vector<float> values; if (!mask(y, x)) { if (!background.empty()) { blurred(y, x) = background(y, x); } continue; } for (int yy = y - radius; yy <= y + radius; ++yy) { for (int xx = x - radius; xx <= x + radius; ++xx) { // Ignore out of bounds if (0 > yy || yy >= mat.rows || 0 > xx || xx >= mat.cols) { continue; } // Ignore outside mask if (!mask(yy, xx)) { continue; } // Ignore NAN values if specified to do so if (ignoreNan && (std::isnan(mat(yy, xx)) || mat(yy, xx) == 0)) { continue; } values.push_back(mat(yy, xx)); } } if (values.size() != 0) { const size_t n = values.size() / 2; std::partial_sort(values.begin(), values.begin() + n + 1, values.end()); if (values.size() % 2 == 1) { blurred(y, x) = values[n]; } else { blurred(y, x) = (values[n - 1] + values[n]) / 2.0; } } } } return blurred; } // Convert a BGR(A) cv::Mat to a RGBA vector<uint8_t> inline std::vector<uint8_t> getRGBA8Vector(const cv::Mat& src) { const int numChannels = src.channels(); CHECK(numChannels == 3 || numChannels == 4) << "unexpected " << numChannels; const bool hasAlpha = numChannels == 4; cv::Mat imageMat = convertTo<uint8_t>(src.clone()); // Add an alpha-channel if necessary (the textures expect one), and // re-order BGRA->RGBA cvtColor(imageMat, imageMat, hasAlpha ? cv::COLOR_BGRA2RGBA : cv::COLOR_BGR2RGBA); static const int kChannels = 4; const int imageDataNumBytes = imageMat.rows * imageMat.cols * kChannels; std::vector<uint8_t> imageBytes(imageDataNumBytes); for (int i = 0; i < imageDataNumBytes; ++i) { imageBytes[i] = imageMat.ptr<uint8_t>()[i]; } return imageBytes; } template <typename T> inline T createBGR(const float b, const float g, const float r); template <> inline cv::Vec3f createBGR(const float b, const float g, const float r) { return cv::Vec3f(b, g, r) * maxPixelValueFromCvDepth(CV_32F); } // Values < 1 will be truncated to 0 when creating unsigned short. Need to scale it beforehand template <> inline cv::Vec3w createBGR(const float b, const float g, const float r) { const float maxVal = maxPixelValueFromCvDepth(CV_16U); return cv::Vec3w(b * maxVal, g * maxVal, r * maxVal); } template <typename T> inline T createBGRA(const float b, const float g, const float r, const float a); template <> inline cv::Vec4f createBGRA(const float b, const float g, const float r, const float a) { return cv::Vec4f(b, g, r, a) * maxPixelValueFromCvDepth(CV_32F); } // Values < 1 will be truncated to 0 when creating unsigned short. Need to scale it beforehand template <> inline cv::Vec4w createBGRA(const float b, const float g, const float r, const float a) { const float maxVal = maxPixelValueFromCvDepth(CV_16U); return cv::Vec4w(b * maxVal, g * maxVal, r * maxVal, a * maxVal); } inline std::vector<cv::Mat_<bool>> generateAllPassMasks(const cv::Size& size, const int numMasks) { const cv::Mat_<bool> allPass(size, true); const std::vector<cv::Mat_<bool>> masks(numMasks, allPass); return masks; } } // namespace cv_util } // namespace fb360_dep

source/util/CvUtil.h (355 lines of code) (raw):