source/depth_estimation/PyramidLevel.h

/** * Copyright 2004-present Facebook. All Rights Reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <gflags/gflags.h> #include <glog/logging.h> #include "source/util/CvUtil.h" #include "source/util/FilesystemUtil.h" #include "source/util/ImageTypes.h" #include "source/util/ImageUtil.h" #include "source/util/SystemUtil.h" #include "source/util/ThreadPool.h" namespace fb360_dep { namespace depth_estimation { template <typename PixelType> struct PyramidLevel { struct Src { cv::Mat_<PixelType> color; cv::Mat_<float> variance; cv::Mat_<bool> foregroundMask; cv::Mat_<bool> foregroundMaskDilated; }; struct Dst { cv::Mat_<PixelType> color; cv::Mat_<float> disparity; cv::Mat_<bool> mismatchedDisparityMask; cv::Mat_<float> cost; cv::Mat_<float> confidence; cv::Mat_<int> overlap; cv::Mat_<bool> fovMask; cv::Mat_<bool> foregroundMask; cv::Mat_<float> backgroundDisparity; }; struct Proj { cv::Mat_<cv::Vec2f> projWarp; cv::Mat_<cv::Vec2f> projWarpInv; cv::Mat_<PixelType> projColor; cv::Mat_<PixelType> projColorBias; }; // if first frame is 000039, frameIdx = 0, frameName = 000039 int frameIdx; std::string frameName; int numFrames; int level; int numLevels; cv::Size sizeLevel; std::map<int, cv::Size> levelSizes; Camera::Rig rigSrc; Camera::Rig rigDst; std::vector<int> dst2srcIdxs; public: int findDstIdx(const std::string& dstId) const { for (int i = 0; i < int(rigDst.size()); ++i) { if (rigDst[i].id == dstId) { return i; } } CHECK(false) << "Cannot find dst idx for ID: " << dstId; } std::vector<Src> srcs; std::vector<Dst> dsts; std::vector<Proj> projs; filesystem::path srcColorsPath; // in case we want to load full-size images int widthFullSize; int heightFullSize; float varNoiseFullSize; float varNoiseFloor; float varHighThresh; bool hasForegroundMasks; filesystem::path outputDir; int numThreads; PyramidLevel( const int frameIdxIn, const std::string& frameNameIn, const int numFramesIn, const int levelIn, const int numLevelsIn, const std::map<int, cv::Size>& levelSizesIn, const Camera::Rig& rigSrcIn, const Camera::Rig& rigDstIn, const std::vector<int>& dst2srcIdxsIn, const std::vector<cv::Mat_<PixelType>>& srcColorsIn, const std::vector<cv::Mat_<bool>>& srcForegroundMasksIn, const std::vector<cv::Mat_<bool>>& dstFovMasksIn, const std::vector<cv::Mat_<float>>& dstBackgroundDisparitiesIn, const int widthFullSizeIn, const int heightFullSizeIn, const std::string& color, const float varNoiseFloor, const float varHighThresh, const bool useForegroundMasks, const std::string& outputRoot, const int threads) : frameIdx(frameIdxIn), frameName(frameNameIn), numFrames(numFramesIn), level(levelIn), numLevels(numLevelsIn), levelSizes(levelSizesIn), rigSrc(rigSrcIn), rigDst(rigDstIn), dst2srcIdxs(dst2srcIdxsIn), srcColorsPath(color), widthFullSize(widthFullSizeIn), heightFullSize(heightFullSizeIn), varNoiseFullSize(varNoiseFloor), varHighThresh(varHighThresh), hasForegroundMasks(useForegroundMasks), outputDir(outputRoot), numThreads(threads) { sizeLevel = levelSizes[level]; checkParams(); const int numSrcs = rigSrc.size(); const int numDsts = rigDst.size(); CHECK_EQ(srcColorsIn.size(), numSrcs); CHECK_EQ(srcForegroundMasksIn.size(), numSrcs); CHECK_EQ(dstFovMasksIn.size(), numDsts); CHECK_EQ(dstBackgroundDisparitiesIn.size(), numDsts); for (int srcIdx = 0; srcIdx < numSrcs; ++srcIdx) { Src src; src.color = srcColorsIn[srcIdx]; src.foregroundMask = srcForegroundMasksIn[srcIdx]; // Dilated mask is used to find pixels 8-connected with the foreground src.foregroundMaskDilated = cv_util::dilate(src.foregroundMask); srcs.push_back(src); } for (int dstIdx = 0; dstIdx < numDsts; ++dstIdx) { Dst dst; dst.color = srcColor(dst2srcIdxs[dstIdx]); // OpenCV: reference count dst.foregroundMask = srcForegroundMask(dst2srcIdxs[dstIdx]); dst.fovMask = dstFovMasksIn[dstIdx]; dst.backgroundDisparity = dstBackgroundDisparitiesIn[dstIdx]; dsts.push_back(dst); } projs.resize(numDsts * numSrcs); createLevelMats(); computeVariances(); } void checkParams() { CHECK_GT(numLevels, 0); CHECK_GE(frameIdx, 0); CHECK_GT(rigSrc.size(), 0); CHECK_GT(rigDst.size(), 0); // Assuming all dst cameras have the same resolution for (int iDst = 1; iDst < int(rigDst.size()); ++iDst) { CHECK_EQ(rigDst[iDst].resolution, rigDst[0].resolution); } // Assuming all src cameras have the same resolution for (int iSrc = 1; iSrc < int(rigSrc.size()); ++iSrc) { CHECK_EQ(rigSrc[iSrc].resolution, rigSrc[0].resolution); } } template <typename T> void createIfEmpty(cv::Mat_<T>& mat, const cv::Size& size, const T& val) { if (mat.empty()) { mat.create(size); mat.setTo(val); } else { CHECK_EQ(mat.size(), size); } } template <typename T> void createOrReleaseMat( cv::Mat_<T>& mat, const bool createOrRelease, // true = create, false = release const cv::Size& size, const T& val) { if (createOrRelease) { createIfEmpty(mat, size, val); } else { mat.release(); } } // createOrRelease: true = create, false = release void createOrReleaseLevelMats(const bool createOrRelease) { const float zeroF = 0.0f; const bool zeroM = false; CHECK_EQ(srcs.size(), rigSrc.size()); for (int srcIdx = 0; srcIdx < int(rigSrc.size()); ++srcIdx) { createOrReleaseMat(srcVariance(srcIdx), createOrRelease, sizeLevel, zeroF); } CHECK_EQ(dsts.size(), rigDst.size()); for (int dstIdx = 0; dstIdx < int(rigDst.size()); ++dstIdx) { createOrReleaseMat(dstDisparity(dstIdx), createOrRelease, sizeLevel, zeroF); createOrReleaseMat(dstMismatchedDisparityMask(dstIdx), createOrRelease, sizeLevel, zeroM); createOrReleaseMat(dstFovMask(dstIdx), createOrRelease, sizeLevel, zeroM); createOrReleaseMat(dstCost(dstIdx), createOrRelease, sizeLevel, zeroF); createOrReleaseMat(dstConfidence(dstIdx), createOrRelease, sizeLevel, zeroF); } } void createLevelMats() { createOrReleaseLevelMats(true); } void computeVariances() { // Variance noise is multiplied by the square of the scale const float scale = float(sizeLevel.width) / heightFullSize; const float scaleVar = math_util::square(scale); varNoiseFloor = std::max(varNoiseFullSize * scaleVar, depth_estimation::kMinVar); ThreadPool threadPool(numThreads); for (int srcIdx = 0; srcIdx < int(rigSrc.size()); ++srcIdx) { threadPool.spawn([&, srcIdx] { // Variance will be used during cost computation, random proposals and // disparity mismatch handling srcVariance(srcIdx) = computeImageVariance(srcColor(srcIdx)); }); } threadPool.join(); } cv::Mat_<PixelType>& srcColor(const int srcId) { return srcs[srcId].color; } const cv::Mat_<PixelType>& srcColor(const int srcId) const { return const_cast<PyramidLevel<PixelType>*>(this)->srcColor(srcId); } cv::Mat_<PixelType>& dstColor(const int dstId) { return dsts[dstId].color; } const cv::Mat_<PixelType>& dstColor(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstColor(dstId); } cv::Mat_<float>& dstDisparity(const int dstId) { return dsts[dstId].disparity; } const cv::Mat_<float>& dstDisparity(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstDisparity(dstId); } cv::Mat_<bool>& dstMismatchedDisparityMask(const int dstId) { return dsts[dstId].mismatchedDisparityMask; } const cv::Mat_<bool>& dstMismatchedDisparityMask(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstMismatchedDisparityMask(dstId); } cv::Mat_<bool>& dstFovMask(const int dstId) { return dsts[dstId].fovMask; } const cv::Mat_<bool>& dstFovMask(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstFovMask(dstId); } cv::Mat_<float>& dstCost(const int dstId) { return dsts[dstId].cost; } const cv::Mat_<float>& dstCost(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstCost(dstId); } cv::Mat_<float>& dstBackgroundDisparity(const int dstId) { return dsts[dstId].backgroundDisparity; } const cv::Mat_<float>& dstBackgroundDisparity(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstBackgroundDisparity(dstId); } cv::Mat_<bool>& srcForegroundMask(const int srcId) { return srcs[srcId].foregroundMask; } const cv::Mat_<bool>& srcForegroundMask(const int srcId) const { return const_cast<PyramidLevel<PixelType>*>(this)->srcForegroundMask(srcId); } cv::Mat_<bool>& srcForegroundMaskDilated(const int srcId) { return srcs[srcId].foregroundMaskDilated; } const cv::Mat_<bool>& srcForegroundMaskDilated(const int srcId) const { return const_cast<PyramidLevel<PixelType>*>(this)->srcForegroundMaskDilated(srcId); } cv::Mat_<bool>& dstForegroundMask(const int dstId) { return dsts[dstId].foregroundMask; } const cv::Mat_<bool>& dstForegroundMask(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstForegroundMask(dstId); } cv::Mat_<float>& srcVariance(const int srcId) { return srcs[srcId].variance; } const cv::Mat_<float>& srcVariance(const int srcId) const { return const_cast<PyramidLevel<PixelType>*>(this)->srcVariance(srcId); } cv::Mat_<float>& dstVariance(const int dstId) { return srcs[dst2srcIdxs[dstId]].variance; } const cv::Mat_<float>& dstVariance(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstVariance(dstId); } cv::Mat_<float>& dstConfidence(const int dstId) { return dsts[dstId].confidence; } const cv::Mat_<float>& dstConfidence(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstConfidence(dstId); } cv::Mat_<int>& dstOverlap(const int dstId) { return dsts[dstId].overlap; } const cv::Mat_<int>& dstOverlap(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstOverlap(dstId); } // Index of src when we have all srcs for each dst int getDstSrcIdx(const int dstId, const int srcId) const { return dstId * rigSrc.size() + srcId; } int getDstSrcIdx(const int dstId) const { return getDstSrcIdx(dstId, dst2srcIdxs[dstId]); } cv::Mat_<cv::Vec2f>& dstProjWarp(const int dstId, const int srcId) { return projs[getDstSrcIdx(dstId, srcId)].projWarp; } const cv::Mat_<cv::Vec2f>& dstProjWarp(const int dstId, const int srcId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstProjWarp(dstId, srcId); } cv::Mat_<cv::Vec2f>& dstProjWarpInv(const int dstId, const int srcId) { return projs[getDstSrcIdx(dstId, srcId)].projWarpInv; } const cv::Mat_<cv::Vec2f>& dstProjWarpInv(const int dstId, const int srcId) const { return const_cast<PyramidLevel*>(this)->dstProjWarpInv(dstId, srcId); } cv::Mat_<PixelType>& dstProjColor(const int dstId, const int srcId) { return projs[getDstSrcIdx(dstId, srcId)].projColor; } const cv::Mat_<PixelType>& dstProjColor(const int dstId, const int srcId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstProjColor(dstId, srcId); } cv::Mat_<PixelType>& dstProjColorBias(const int dstId, const int srcId) { return projs[getDstSrcIdx(dstId, srcId)].projColorBias; } const cv::Mat_<PixelType>& dstProjColorBias(const int dstId, const int srcId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstProjColorBias(dstId, srcId); } cv::Mat_<PixelType>& dstProjColor(const int dstId) { return projs[getDstSrcIdx(dstId)].projColor; } const cv::Mat_<PixelType>& dstProjColor(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstProjColor(dstId); } cv::Mat_<PixelType>& dstProjColorBias(const int dstId) { return projs[getDstSrcIdx(dstId)].projColorBias; } const cv::Mat_<PixelType>& dstProjColorBias(const int dstId) const { return const_cast<PyramidLevel<PixelType>*>(this)->dstProjColorBias(dstId); } void saveDstImage(const int dstIdx, const ImageType imageType, const float scale = 1.0f) { cv::Mat dstImage; switch (imageType) { case ImageType::disparity_levels: dstImage = dstDisparity(dstIdx); break; case ImageType::cost: dstImage = dstCost(dstIdx); break; case ImageType::confidence: dstImage = dstConfidence(dstIdx); break; case ImageType::mismatches: dstImage = overlayMismatchedDstDisparityMask(dstIdx); break; default: CHECK(false) << "unexpected image type " << imageTypes[(int)imageType]; } if (dstImage.empty()) { return; } cv::Mat scaledDstImage = dstImage * scale; if (imageType == ImageType::disparity_levels) { // note: disparity values will be clamped to the [0,1] range (which get scaled // to [0, 2^16 - 1]) and nans will be converted to zero scaledDstImage = cv_util::convertTo<uint16_t>(scaledDstImage); } const std::string& dstId = rigDst[dstIdx].id; const filesystem::path fn = depth_estimation::genFilename(outputDir, imageType, level, dstId, frameName, "png"); cv_util::imwriteExceptionOnFail(fn, scaledDstImage); } void saveDebugImages() { for (int dstIdx = 0; dstIdx < int(rigDst.size()); ++dstIdx) { saveDstImage(dstIdx, ImageType::disparity_levels, 1.0f); saveDstImage(dstIdx, ImageType::cost, depth_estimation::kScaleCostPlot); saveDstImage(dstIdx, ImageType::confidence, depth_estimation::kScaleConfidencePlot); saveDstImage(dstIdx, ImageType::mismatches, depth_estimation::kScaleDisparityPlot); } } cv::Mat_<cv::Vec4f> overlayMismatchedDstDisparityMask(const int dstIdx) { const cv::Mat_<bool>& mask = dstMismatchedDisparityMask(dstIdx); const cv::Mat_<float>& disparity = dstDisparity(dstIdx); const cv::Mat_<bool>& fov = dstFovMask(dstIdx); cv::Mat_<cv::Vec4f> maskedDisparity(disparity.size(), NAN); for (int x = 0; x < maskedDisparity.cols; ++x) { for (int y = 0; y < maskedDisparity.rows; ++y) { if (!fov(y, x)) { continue; } if (mask(y, x)) { maskedDisparity(y, x) = cv::Vec4f(0.f, 0.f, 1.f, 1.f); // red } else { float d = disparity(y, x); maskedDisparity(y, x) = cv::Vec4f(d, d, d, 1.f); } } } return maskedDisparity; } void saveResults(const std::string& outputFormatsStr) { std::vector<std::string> outputFormatsVec; folly::split(",", outputFormatsStr, outputFormatsVec); std::unordered_set<std::string> outputFormats(outputFormatsVec.begin(), outputFormatsVec.end()); const bool saveExr = (outputFormats.find("exr") != outputFormats.end()); const bool savePfm = true; // always save PFM const bool savePng = (outputFormats.find("png") != outputFormats.end()); if (!(saveExr || savePfm || savePng)) { return; } ThreadPool threadPool(numThreads); for (int dstIdx = 0; dstIdx < int(rigDst.size()); ++dstIdx) { threadPool.spawn([&, dstIdx] { const cv::Mat_<float>& disp = dstDisparity(dstIdx); const ImageType imageType = ImageType::disparity_levels; std::map<std::string, bool> types = {{"exr", saveExr}, {"pfm", savePfm}, {"png", savePng}}; for (std::pair<std::string, bool> type : types) { if (!type.second) { continue; } const std::string& t = type.first; const std::string& dstId = rigDst[dstIdx].id; const filesystem::path fn = depth_estimation::genFilename(outputDir, imageType, level, dstId, frameName, t); boost::filesystem::create_directories(fn.parent_path()); if (t == "exr") { cv_util::imwriteExceptionOnFail(fn, disp); } else if (t == "pfm") { cv_util::writeCvMat32FC1ToPFM(fn, disp); } else if (t == "png") { const cv::Mat_<uint16_t> disp16 = cv_util::convertTo<uint16_t>(disp); cv_util::imwriteExceptionOnFail(fn, disp16); } else { CHECK(false) << "Invalid type: " << t; } } }); } threadPool.join(); } }; } // namespace depth_estimation } // namespace fb360_dep

source/depth_estimation/PyramidLevel.h (424 lines of code) (raw):