source/depth_estimation/DerpCLI.cpp (255 lines of code) (raw):

/** * Copyright 2004-present Facebook. All Rights Reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include <random> #include <boost/timer/timer.hpp> #include <glog/logging.h> #include <folly/Format.h> #include <folly/String.h> #include "source/depth_estimation/Derp.h" #include "source/depth_estimation/UpsampleDisparityLib.h" using namespace fb360_dep; using namespace fb360_dep::cv_util; using namespace fb360_dep::depth_estimation; using namespace fb360_dep::image_util; const std::string kUsageMessage = R"( - Runs depth estimation on a set of frames. We assume the inputs have already been resized into the appropriate pyramid level widths before execution. See scripts/render/config.py to see the assumed widths. - Example: ./DerpCLI \ --input_root=/path/to/ \ --output_root=/path/to/output \ --rig=/path/to/rigs/rig.json \ --first=000000 \ --last=000000 )"; DEFINE_string(background_disp, "", "path to background disparities"); DEFINE_string(background_frame, "000000", "background frame (lexical)"); DEFINE_string(cameras, "", "comma-separated destinations to render (empty for all)"); DEFINE_string(color, "", "path to input color images"); DEFINE_bool(do_bilateral_filter, true, "apply bilateral filter at each level"); DEFINE_bool(do_median_filter, true, "apply median filter to disparity at each level"); DEFINE_string(first, "000000", "first frame to process (lexical)"); DEFINE_string(foreground_masks, "", "path to foreground masks"); DEFINE_string(input_root, "", "path to input data (required)"); DEFINE_string(last, "000000", "last frame to process (lexical)"); DEFINE_int32(level_end, -1, "level to end at (-1 = finest)"); DEFINE_int32(level_start, -1, "level to start at (-1 = coarsest)"); DEFINE_double(max_depth_m, 1e4, "max depth (m)"); DEFINE_double(min_depth_m, .50, "min depth (m)"); DEFINE_int32(mismatches_start_level, -1, "(-1 = no mismatch handling)"); DEFINE_int32(num_levels, -1, "number of levels in the pyramid (-1 = uses highest level)"); DEFINE_string(output_formats, "", "saved formats, comma separated (exr, png, pfm supported)"); DEFINE_string(output_root, "", "path to output directory (required)"); DEFINE_bool(partial_coverage, false, "set to true if no 360 coverage"); DEFINE_int32(ping_pong_iterations, 1, "number of spatial propagation iterations"); DEFINE_int32(random_proposals, 2, "number of proposed random disparities before propagation"); DEFINE_int32(resolution, 2048, "Output resolution (width in pixels)"); DEFINE_string(rig, "", "path to camera rig .json"); DEFINE_bool(save_debug_images, false, "if true, save debugging output images"); DEFINE_int32(threads, -1, "number of threads (-1 = auto, 0 = none)"); DEFINE_bool(use_foreground_masks, false, "use pre-computed foreground masks"); DEFINE_double(var_high_thresh, 1e-3, "ignore variances higher than this threshold"); DEFINE_double(var_noise_floor, 4e-5, "noise variance floor on original, full-size images"); void verifyInputs() { CHECK_NE(FLAGS_input_root, ""); CHECK_NE(FLAGS_output_root, ""); if (FLAGS_level_start >= 0 && FLAGS_level_end >= 0) { CHECK_GE(FLAGS_level_start, FLAGS_level_end); } if (FLAGS_rig.empty()) { FLAGS_rig = FLAGS_input_root + "/rigs/rig_calibrated.json"; } if (FLAGS_color.empty()) { FLAGS_color = getImageDir(FLAGS_input_root, ImageType::color_levels).string(); } if (FLAGS_background_disp.empty()) { FLAGS_background_disp = getImageDir(FLAGS_input_root, ImageType::background_disp_levels).string(); } if (FLAGS_foreground_masks.empty()) { FLAGS_foreground_masks = getImageDir(FLAGS_input_root, ImageType::foreground_masks_levels).string(); } // Check flag values CHECK_GE(FLAGS_random_proposals, 0); CHECK_LE(FLAGS_first, FLAGS_last); const bool hasColorImages = filesystem::is_directory(FLAGS_color); CHECK(hasColorImages) << "No images in " << FLAGS_color; if (FLAGS_use_foreground_masks) { const bool hasBackgroundDisps = filesystem::is_directory(FLAGS_background_disp); CHECK(hasBackgroundDisps) << "Asked to use background but no background disparities found in " << FLAGS_background_disp; const bool hasDstForegroundMasks = filesystem::is_directory(FLAGS_foreground_masks); CHECK(hasDstForegroundMasks) << "Asked to use foreground masks but no foreground masks found in " << FLAGS_foreground_masks; } std::vector<std::string> outputFormats; folly::split(",", FLAGS_output_formats, outputFormats); for (std::string& outputFormat : outputFormats) { // We allow size 0 inputs to ensure stray commas are ignored, i.e. exr,,png is fine CHECK( outputFormat.size() == 0 || outputFormat == "exr" || outputFormat == "png" || outputFormat == "pfm") << "Invalid output format specified: " << outputFormat; } } filesystem::path getLevelDisparityDir(const int level) { return getImageDir(FLAGS_output_root, ImageType::disparity_levels, level); } filesystem::path getLevelColorDir(const int level) { return folly::sformat("{}/level_{}", FLAGS_color, std::to_string(level)); } filesystem::path getLevelForegroundMasksDir(const int level) { return folly::sformat("{}/level_{}", FLAGS_foreground_masks, std::to_string(level)); } filesystem::path getLevelBackgroundDisparityDir(const int level) { return folly::sformat("{}/level_{}", FLAGS_background_disp, std::to_string(level)); } // Verifies that we have all the frames we are asking for void verifyInputImagePaths( const Camera::Rig& rigSrc, const Camera::Rig& rigDst, const int numLevels) { const int levelStart = FLAGS_level_start >= 0 ? FLAGS_level_start : numLevels - 1; verifyImagePaths(getLevelColorDir(levelStart), rigSrc, FLAGS_first, FLAGS_last); if (FLAGS_use_foreground_masks) { // We need just one background disparity, with one background mask per camera and frame verifyImagePaths( getLevelBackgroundDisparityDir(levelStart), rigDst, FLAGS_background_frame, FLAGS_background_frame); verifyImagePaths(getLevelForegroundMasksDir(levelStart), rigDst, FLAGS_first, FLAGS_last); } if (levelStart < numLevels - 1) { verifyImagePaths(getLevelDisparityDir(levelStart + 1), rigDst, FLAGS_first, FLAGS_last); } } int getLevelEnd(const std::map<int, cv::Size>& pyramidLevelSizes) { int levelEnd = 0; for (const auto& levelSize : pyramidLevelSizes) { if (levelSize.second.width <= FLAGS_resolution) { levelEnd = levelSize.first; break; } } if (FLAGS_level_end >= 0) { CHECK_GE(FLAGS_level_end, levelEnd) << folly::sformat( "Requested end level {} ({}), which is larger than requested resolution ({})", FLAGS_level_end, pyramidLevelSizes.at(FLAGS_level_end).width, FLAGS_resolution); } levelEnd = std::max(levelEnd, FLAGS_level_end); return levelEnd; } int main(int argc, char* argv[]) { system_util::initDep(argc, argv, kUsageMessage); boost::timer::cpu_timer matchTimer; verifyInputs(); Camera::Rig rigSrc = Camera::loadRig(FLAGS_rig); const int numSrcs = rigSrc.size(); CHECK_GT(numSrcs, 0) << "no source cameras!"; Camera::Rig rigDst = filterDestinations(rigSrc, FLAGS_cameras); const int numDsts = rigDst.size(); CHECK_GT(numDsts, 0) << "no destination cameras!"; const std::vector<int> dst2srcIdxs = mapSrcToDstIndexes(rigSrc, rigDst); // Get pyramid level sizes from both the disparity and color directories std::map<int, cv::Size> pyramidLevelSizes; getPyramidLevelSizes(pyramidLevelSizes, FLAGS_color); getPyramidLevelSizes( pyramidLevelSizes, getImageDir(FLAGS_output_root, ImageType::disparity_levels)); const int numLevels = FLAGS_num_levels == -1 ? pyramidLevelSizes.rbegin()->first + 1 : FLAGS_num_levels; // Get largest level smaller or equal to the requested resolution const int levelStart = FLAGS_level_start >= 0 ? FLAGS_level_start : numLevels - 1; const int levelEnd = getLevelEnd(pyramidLevelSizes); CHECK_LE(FLAGS_level_start, numLevels); const int numFrames = std::stoi(FLAGS_last) - std::stoi(FLAGS_first) + 1; verifyInputImagePaths(rigSrc, rigDst, numLevels); filesystem::create_directories(FLAGS_output_root); // These must be computed before normalizing to determine the correct resolutions const Camera& camRef = rigDst[0]; const int widthFullSize = camRef.resolution.x(); const int heightFullSize = camRef.resolution.y(); // Normalize cameras (needed to generate FOV masks and to process frames) Camera::normalizeRig(rigSrc); Camera::normalizeRig(rigDst); for (int level = levelStart; level >= levelEnd; --level) { // Create level output directories createLevelOutputDirs(FLAGS_output_root, level, rigDst, FLAGS_save_debug_images); // Create dst FOV masks for current level size const cv::Size& sizeLevel = pyramidLevelSizes.at(level); const std::vector<cv::Mat_<bool>> dstFovMasks = generateFovMasks(rigDst, sizeLevel, FLAGS_threads); for (int iFrame = 0; iFrame < numFrames; ++iFrame) { // Load current level data const std::string frameName = image_util::intToStringZeroPad(iFrame + std::stoi(FLAGS_first), 6); // Color std::vector<cv::Mat_<PixelType>> colorImagesLevel = loadLevelImages<PixelType>(FLAGS_color, level, rigSrc, frameName, FLAGS_threads); // Foreground masks std::vector<cv::Mat_<bool>> srcForegroundMasksLevel = FLAGS_use_foreground_masks ? loadLevelImages<bool>(FLAGS_foreground_masks, level, rigSrc, frameName, FLAGS_threads) : cv_util::generateAllPassMasks(sizeLevel, numSrcs); // Background disparities std::vector<cv::Mat_<float>> dstBackgroundDisparitiesLevel(rigDst.size()); if (FLAGS_use_foreground_masks) { dstBackgroundDisparitiesLevel = loadLevelImages<float>( FLAGS_background_disp, level, rigDst, FLAGS_background_frame, FLAGS_threads); } PyramidLevel<PixelType> framePyramidLevel( iFrame, frameName, numFrames, level, numLevels, pyramidLevelSizes, rigSrc, rigDst, dst2srcIdxs, colorImagesLevel, srcForegroundMasksLevel, dstFovMasks, dstBackgroundDisparitiesLevel, widthFullSize, heightFullSize, FLAGS_color, FLAGS_var_noise_floor, FLAGS_var_high_thresh, FLAGS_use_foreground_masks, FLAGS_output_root, FLAGS_threads); // Generate/link reprojections precomputeProjections(framePyramidLevel, FLAGS_threads); if (level < numLevels - 1) { // Allocate masks but only populate them if needed std::vector<cv::Mat_<bool>> dstForegroundMasksLevel(numDsts); std::vector<cv::Mat_<bool>> dstForegroundMasksCoarse(numDsts); if (FLAGS_use_foreground_masks) { dstForegroundMasksLevel = loadLevelImages<bool>( FLAGS_foreground_masks, level, rigDst, frameName, FLAGS_threads); dstForegroundMasksCoarse = loadLevelImages<bool>( FLAGS_foreground_masks, level + 1, rigDst, frameName, FLAGS_threads); } const std::vector<cv::Mat_<float>> dstDispsCoarse = loadImages<float>(getLevelDisparityDir(level + 1), rigDst, frameName, FLAGS_threads); const std::vector<cv::Mat_<float>> dstDispsNextLevel = upsampleDisparities( rigDst, dstDispsCoarse, dstBackgroundDisparitiesLevel, dstForegroundMasksCoarse, dstForegroundMasksLevel, sizeLevel, FLAGS_use_foreground_masks, FLAGS_threads); for (int dstIdx = 0; dstIdx < numDsts; ++dstIdx) { framePyramidLevel.dsts[dstIdx].disparity = dstDispsNextLevel[dstIdx]; } } processLevel( framePyramidLevel, FLAGS_output_formats, FLAGS_use_foreground_masks, FLAGS_output_root, FLAGS_random_proposals, FLAGS_partial_coverage, FLAGS_min_depth_m, FLAGS_max_depth_m, FLAGS_do_median_filter, FLAGS_save_debug_images, FLAGS_ping_pong_iterations, FLAGS_mismatches_start_level, FLAGS_do_bilateral_filter, FLAGS_threads); } LOG(INFO) << folly::sformat("-- Elapsed time: {}", matchTimer.format()); } LOG(INFO) << folly::sformat("-- TOTAL: {}", matchTimer.format()); return EXIT_SUCCESS; }