source/calibration/GeometricCalibration.cpp (1,048 lines of code) (raw):

/** * Copyright 2004-present Facebook. All Rights Reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include "source/calibration/GeometricCalibration.h" #include <future> #include <iostream> #include <random> #include <unordered_map> #include <boost/algorithm/string.hpp> #include <boost/timer/timer.hpp> #include <gflags/gflags.h> #include <glog/logging.h> #include <opencv2/opencv.hpp> #include <folly/FileUtil.h> #include <folly/Format.h> #include <folly/dynamic.h> #include <folly/json.h> #include "source/calibration/Calibration.h" #include "source/util/Camera.h" #include "source/util/CvUtil.h" #include "source/util/MathUtil.h" #include "source/util/ThreadPool.h" using namespace fb360_dep; using namespace fb360_dep::calibration; DEFINE_int32(cap_traces, 0, "speed up solver by capping the number of traces"); DEFINE_double(ceres_function_tolerance, 1e-6, "ceres function tolerance"); DEFINE_int32( ceres_threads, -1, "number of threads used by ceres. requires compiled support for multithreading (default 1)"); DEFINE_string(debug_dir, "", "path to debug output"); DEFINE_double(debug_error_scale, 0, "show scaled reprojection errors"); DEFINE_double(debug_matches_overlap, 1, "show matches if overlap exceeds this fraction"); DEFINE_bool( dir_per_frame, false, "is there a directory per frame?\n" "i.e. is an image path of the form: \n" " <frame index>/ ... /<camera id>.<extension>\n" " e.g. 1/cam2.bmp or 000001/isp_out/cam14.png\n" "the default is a directory per camera. i.e. an image is of the form:\n" " .../<camera id>/<frame index>.<extension>\n" " e.g. cam2/123.bmp or rgb/cam14/000123.png"); DEFINE_bool(discard_outside_fov, true, "discard matches outside fov"); DEFINE_string(errors_dir, "", "directory where errors will be saved"); DEFINE_int32(experiments, 1, "calibrate multiple times"); DEFINE_bool(force_in_front, true, "no intersections behind camera"); DEFINE_bool(keep_invalid_traces, false, "keep traces with multiple points from the same camera"); DEFINE_bool(lock_distortion, true, "lock the distorion"); DEFINE_bool(lock_focal, false, "lock the focal"); DEFINE_bool(lock_positions, true, "don't calibrate position"); DEFINE_bool(lock_principals, false, "don't calibrate principals"); DEFINE_bool(lock_rotations, false, "don't calibrate rotation"); DEFINE_double(max_error, 0.5, "maximum allowable error for calibration to be valid"); DEFINE_int32(min_traces, 10, "minimum number of traces for camera to be sufficiently constrained"); DEFINE_double(outlier_factor, 5, "reject if error is factor * median"); DEFINE_double( outlier_z_threshold, 3, "z score threshold on traces to consider a camera an outlier"); DEFINE_int32(pass_count, 10, "number of passes"); DEFINE_double(perturb_focals, 0, "pertub focals (pixels / radian)"); DEFINE_double(perturb_positions, 0, "perturb positions (m)"); DEFINE_double(perturb_principals, 0, "pertub principals (pixels)"); DEFINE_double(perturb_rotations, 0, "perturb rotations (radians)"); DEFINE_int32(point_count, 10000, "artificial points to generate"); DEFINE_double(point_error_stddev, 0.5, "error added to artificial points"); DEFINE_double(point_min_dist, 1, "minimum distance of artificial points"); DEFINE_string(points_file, "", "path to output calibration points file, default next to output"); DEFINE_string( points_file_json, "", "path to output calibration points file including reference points, default next to output"); DEFINE_string(reference_camera, "", "reference camera to lock if positions are unlocked"); DEFINE_double( remove_sparse_overlaps, 0, "reject overlaps with fewer than this fraction of the average match count"); DEFINE_bool(report_per_camera_errors, false, "per camera reprojection error statistics"); DEFINE_bool(robust, true, "use Huber loss function"); DEFINE_int32(seed, -1, "seed for random number generator"); DEFINE_bool(shared_distortion, true, "all cameras in a group share the same distortion"); DEFINE_bool( shared_principal_and_focal, false, "all cameras in a group share the same focal, principal"); DEFINE_bool( weight_by_trace_count, false, "weight the residual error by the number of traces per camera"); DEFINE_bool(weighted_statistics, false, "compute statistics of weighted residuals"); std::unordered_map<std::string, int> cameraIdToIndex; std::unordered_map<std::string, int> cameraGroupToIndex; std::string imageIdFormat() { return FLAGS_dir_per_frame ? "<frame index>/ ... /<camera id>.<extension>" : ".../<camera id>/<frame index>.<extension>"; } void buildCameraIndexMaps(const Camera::Rig& rig) { for (int i = 0; i < int(rig.size()); ++i) { cameraIdToIndex[rig[i].id] = i; cameraGroupToIndex[rig[i].group] = i; // last camera in group wins } } using ImageId = std::string; std::string getCameraId(const ImageId& image) { // image is actually a path filesystem::path path = image; if (FLAGS_dir_per_frame) { return path.stem().native(); } return path.parent_path().filename().native(); } int getFrameIndex(const ImageId& image) { // image is actually a path filesystem::path path = image; if (FLAGS_dir_per_frame) { return std::stoi(path.begin()->native()); } return std::stoi(path.stem().native()); } bool hasCameraIndex(const ImageId& image) { return cameraIdToIndex.count(getCameraId(image)); } int getCameraIndex(const ImageId& image) { return cameraIdToIndex.at(getCameraId(image)); } // create a string that adheres to the format of an image path ImageId makeArtificialPath(int frame, const std::string& cameraId) { if (FLAGS_dir_per_frame) { return std::to_string(frame) + "/" + cameraId; } return cameraId + "/" + std::to_string(frame); } // input path includes basename and extension cv::Mat_<cv::Vec3w> loadImage(const filesystem::path& path) { const filesystem::path colorDir = FLAGS_color; return cv_util::loadImage<cv::Vec3w>(colorDir / path); } folly::dynamic parseJsonFile(const std::string& path) { std::string json; folly::readFile(path.c_str(), json); CHECK(!json.empty()) << "could not read JSON file: " << path; return folly::parseJson(json); } // a feature is a point in an image struct Feature { Camera::Vector2 position; // position of the feature in its image, in pixels int trace; // index of trace that feature belongs to (or -1 if none) Feature(const Camera::Vector2& position) : position(position), trace(-1) {} }; // a featuremap holds, for each image, a vector of its features using FeatureMap = std::unordered_map<ImageId, std::vector<Feature>>; // an overlap is a pair of images and the matches between their features struct Overlap { std::array<ImageId, 2> images; std::vector<std::array<size_t, 2>> matches; Overlap(const ImageId& image0, const ImageId& image1) { images[0] = image0; images[1] = image1; } bool isIntraFrame() const { return getFrameIndex(images[0]) == getFrameIndex(images[1]); } }; // a trace is a world coordinate and a list of observations that reference it struct Trace { Camera::Vector3 position; std::vector<std::pair<ImageId, int>> references; void add(const ImageId& image, const int index) { references.emplace_back(image, index); } // inherit trace's references void inherit(Trace& trace, FeatureMap& featureMap, int me) { for (const auto& ref : trace.references) { featureMap[ref.first][ref.second].trace = me; } references.insert(references.end(), trace.references.begin(), trace.references.end()); trace.references.clear(); } void clear(FeatureMap& featureMap) { for (const auto& ref : references) { featureMap[ref.first][ref.second].trace = -1; } references.clear(); } }; /* Parse features from <parsed> JSON which has structure { "images": { image_name: [{"x": x, "y": y, ...}] }, ... } where image_name is defined as in imageIdFormat, above. */ FeatureMap loadFeatureMap(const folly::dynamic& parsed) { FeatureMap result; for (const auto& image : parsed["images"].items()) { const ImageId path = image.first.getString(); if (!hasCameraIndex(path)) { LOG(INFO) << folly::sformat("ignoring image id {}", path); continue; } std::vector<Feature>& features = result[path]; for (const auto& feature : image.second) { features.emplace_back(Camera::Vector2(feature["x"].asDouble(), feature["y"].asDouble())); } } CHECK(!result.empty()) << "verify image id format: " << imageIdFormat(); LOG(INFO) << folly::sformat("{} images loaded", result.size()); return result; } /* Parse matches from <parsed> JSON which has structure { "all_matches": [{ "image1": image_name1, "image2": image_name1, "matches": [{ "idx1": idx1, "idx2": idx2 }] }], ... } */ std::vector<Overlap> loadOverlaps(const folly::dynamic& parsed) { std::vector<Overlap> result; size_t count = 0; for (const auto& overlap : parsed["all_matches"]) { ImageId path0 = overlap["image1"].getString(); ImageId path1 = overlap["image2"].getString(); if (!hasCameraIndex(path0) || !hasCameraIndex(path1)) { continue; } result.emplace_back(path0, path1); for (const auto& match : overlap["matches"]) { // A threshold of 0 indicates that score should be ignored // Check if score should be ignored before attempting to access // match["score"] since it might not be defined in the json if (FLAGS_match_score_threshold == 0 || FLAGS_match_score_threshold <= match["score"].asDouble()) { result.back().matches.push_back( {{size_t(match["idx1"].asInt()), size_t(match["idx2"].asInt())}}); } } count += 2 * result.back().matches.size(); } LOG(INFO) << folly::sformat("{} feature observations loaded", count); return result; } Overlap& findOrAddOverlap(std::vector<Overlap>& overlaps, const ImageId& i0, const ImageId& i1) { for (Overlap& overlap : overlaps) { if (overlap.images[0] == i0 && overlap.images[1] == i1) { return overlap; } // make sure we don't have the image pair (i1, i0) in there CHECK(overlap.images[0] != i1 || overlap.images[1] != i0); } // image pair (i0, i1) not found: add it overlaps.emplace_back(i0, i1); return overlaps.back(); } template <typename T> Camera::Vector2 keypointError(T& gen) { std::normal_distribution<> rng(0, FLAGS_point_error_stddev); return {rng(gen), rng(gen)}; } void generateArtificalPoints( FeatureMap& featureMap, std::vector<Overlap>& overlaps, const std::vector<Camera>& cameras) { std::mt19937 mt; for (int p = 0; p < FLAGS_point_count; ++p) { // create a random unit vector double longitude = std::uniform_real_distribution<>(-M_PI, +M_PI)(mt); double z = std::uniform_real_distribution<>(-1, 1)(mt); Camera::Vector3 rig(sqrt(1 - z * z) * cos(longitude), sqrt(1 - z * z) * sin(longitude), z); CHECK_NEAR(rig.squaredNorm(), 1, 0.001); // divide unit vector by random disparity rig /= std::uniform_real_distribution<>(0, 1 / FLAGS_point_min_dist)(mt); // add keypoint to every camera that sees rig std::vector<ImageId> images; for (const Camera& camera : cameras) { if (camera.sees(rig)) { ImageId image = makeArtificialPath(0, camera.id); featureMap[image].emplace_back(camera.pixel(rig) + keypointError(mt)); images.push_back(image); } } // add a match for every pair of cameras that see rig for (int index1 = 0; index1 < int(images.size()); ++index1) { const ImageId& i1 = images[index1]; for (int index0 = 0; index0 < index1; ++index0) { const ImageId& i0 = images[index0]; Overlap& overlap = findOrAddOverlap(overlaps, i0, i1); overlap.matches.push_back({{featureMap[i0].size() - 1, featureMap[i1].size() - 1}}); } } } } Camera::Vector3 triangulate(const Observations& observations) { return triangulateNonlinear(observations, FLAGS_force_in_front); } // return reprojection errors for each camera std::vector<std::vector<Camera::Real>> reprojectionErrors( const std::vector<Overlap>& overlaps, const FeatureMap& featureMap, const std::vector<Trace>& traces, const std::vector<Camera>& cameras) { std::vector<std::vector<Camera::Real>> errors(ssize(cameras)); for (const Overlap& overlap : overlaps) { if (!overlap.isIntraFrame()) { continue; } const std::reference_wrapper<const ImageId> images[2] = {overlap.images[0], overlap.images[1]}; const int idxs[2] = {getCameraIndex(images[0]), getCameraIndex(images[1])}; const std::reference_wrapper<const std::vector<Feature>> features[2] = { featureMap.at(images[0]), featureMap.at(images[1])}; for (const auto& match : overlap.matches) { std::array<const Feature, 2> kps = { {features[0].get()[match[0]], features[1].get()[match[1]]}}; CHECK_EQ(kps[0].trace, kps[1].trace) << "matching features belong to different traces"; Camera::Vector3 rig = kps[0].trace < 0 ? triangulate({{cameras[idxs[0]], kps[0].position}, {cameras[idxs[1]], kps[1].position}}) : traces[kps[0].trace].position; for (ssize_t i = 0; i < ssize(match); ++i) { const Camera::Vector2 pixel = cameras[idxs[i]].pixel(rig); errors[idxs[i]].push_back((pixel - kps[i].position).squaredNorm()); } } } return errors; } // report reprojection errors for each camera void reportReprojectionErrors( const std::vector<Overlap>& overlaps, const FeatureMap& featureMap, const std::vector<Trace>& traces, const std::vector<Camera>& cameras) { if (!FLAGS_report_per_camera_errors) { return; } std::vector<std::vector<Camera::Real>> errors = reprojectionErrors(overlaps, featureMap, traces, cameras); for (ssize_t i = 0; i < ssize(cameras); ++i) { std::sort(errors[i].begin(), errors[i].end()); std::ostringstream line; for (int percentile : {50, 90, 99}) { int index = percentile * (ssize(errors[i]) - 1) / 100.0 + 0.5; line << folly::format("{}%: {:.2f} ", percentile, sqrt(errors[i][index])); } LOG(INFO) << folly::sformat( "{}: {} reproj. percentile {}", cameras[i].id, ssize(errors[i]), line.str()); } } void removeOutliersFromCameras( std::vector<Overlap>& overlaps, const FeatureMap& featureMap, const std::vector<Trace>& traces, const std::vector<Camera>& cameras, const Camera::Real outlierFactor) { // compute reprojection errors for each camera std::vector<std::vector<Camera::Real>> errors = reprojectionErrors(overlaps, featureMap, traces, cameras); // compute median for each camera std::vector<Camera::Real> medians(ssize(errors)); for (ssize_t i = 0; i < ssize(errors); ++i) { medians[i] = calcPercentile(errors[i]); } std::unordered_map<ImageId, int> outliers; // remove matches that neither endpoint wants to keep around std::vector<ssize_t> errorIdxs(ssize(errors), 0); int total = 0; int inlierTotal = 0; for (Overlap& overlap : overlaps) { if (!overlap.isIntraFrame()) { continue; } const std::reference_wrapper<const ImageId> images[2] = {overlap.images[0], overlap.images[1]}; const int idxs[2] = {getCameraIndex(images[0]), getCameraIndex(images[1])}; // move the inliers to front of overlap.matches and resize to fit int inliers = 0; for (const auto& match : overlap.matches) { bool inlier = false; for (ssize_t i = 0; i < ssize(match); ++i) { int cameraIndex = idxs[i]; if (errors[cameraIndex][errorIdxs[cameraIndex]++] < medians[cameraIndex] * outlierFactor) { inlier = true; } } if (inlier) { overlap.matches[inliers++] = match; } } outliers[overlap.images[0]] += ssize(overlap.matches) - inliers; outliers[overlap.images[1]] += ssize(overlap.matches) - inliers; total += ssize(overlap.matches); overlap.matches.resize(inliers); inlierTotal += inliers; } // sanity check that we consumed all the errors for (ssize_t i = 0; i < ssize(errors); ++i) { CHECK_EQ(errorIdxs[i], ssize(errors[i])); } if (FLAGS_log_verbose) { for (const auto& p : outliers) { LOG(INFO) << folly::sformat("Removed {} outliers from {}", p.second, p.first); } } LOG(INFO) << folly::sformat("{} of {} matches were inliers", inlierTotal, total); } void removeInvalidTraces(std::vector<Trace>& traces, FeatureMap& featureMap) { int total = 0; int removed = 0; for (Trace& trace : traces) { if (!trace.references.empty()) { ++total; } std::unordered_set<ImageId> uniqueCameras; for (const auto& ref : trace.references) { if (!uniqueCameras.insert(ref.first).second) { // image referenced more than once, remove the trace trace.clear(featureMap); ++removed; break; } } } LOG(INFO) << folly::sformat("removed {} out of {} traces", removed, total); } void triangulateTracesThread( std::vector<Trace>& traces, const size_t begin, const size_t end, const FeatureMap& featureMap, const std::vector<Camera>& cameras) { for (size_t i = begin; i < end; ++i) { Trace& trace = traces[i]; if (!trace.references.empty()) { Observations observations; for (const auto& ref : trace.references) { const Feature& feature = featureMap.at(ref.first)[ref.second]; const Camera& camera = cameras[getCameraIndex(ref.first)]; observations.emplace_back(camera, feature.position); } trace.position = triangulate(observations); } } } void triangulateTraces( std::vector<Trace>& traces, const FeatureMap& featureMap, const std::vector<Camera>& cameras) { const int threadCount = ThreadPool::getThreadCountFromFlag(FLAGS_threads); std::vector<std::thread> threads; for (int thread = 0; thread < threadCount; ++thread) { threads.emplace_back( triangulateTracesThread, std::ref(traces), thread * traces.size() / threadCount, (thread + 1) * traces.size() / threadCount, std::cref(featureMap), std::cref(cameras)); } for (std::thread& thread : threads) { thread.join(); } } std::vector<Trace> assembleTraces(FeatureMap& featureMap, const std::vector<Overlap>& overlaps) { // mark all features as unreferenced for (auto& features : featureMap) { for (Feature& feature : features.second) { feature.trace = -1; } } std::vector<Trace> traces; int nonemptyTraceCount = 0; for (const Overlap& overlap : overlaps) { const std::reference_wrapper<std::vector<Feature>> features[2] = { featureMap[overlap.images[0]], featureMap[overlap.images[1]]}; for (const auto& match : overlap.matches) { const std::reference_wrapper<int> indexes[2] = { features[0].get()[match[0]].trace, features[1].get()[match[1]].trace}; if (indexes[0] < 0 && indexes[1] < 0) { // neither belongs to a trace, start new trace traces.emplace_back(); nonemptyTraceCount++; for (ssize_t i = 0; i < ssize(indexes); ++i) { indexes[i].get() = traces.size() - 1; traces[indexes[i]].add(overlap.images[i], match[i]); } } else if (indexes[0] < 0) { // 0 does not belong to a trace, add to 1's trace indexes[0].get() = indexes[1]; traces[indexes[0]].add(overlap.images[0], match[0]); } else if (indexes[1] < 0) { // 1 does not belong to a trace, add to 0's trace indexes[1].get() = indexes[0]; traces[indexes[1]].add(overlap.images[1], match[1]); } else if (indexes[0] != indexes[1]) { // merge two traces, 0 inherits 1's references traces[indexes[0]].inherit(traces[indexes[1]], featureMap, indexes[0]); --nonemptyTraceCount; } } } LOG(INFO) << folly::sformat("found {} nonempty traces", nonemptyTraceCount); return traces; } cv::Mat_<cv::Vec3w> blend(const cv::Mat_<cv::Vec3w>& mat0, const cv::Mat_<cv::Vec3w>& mat1) { if (mat0.empty()) { return 0.5 * mat1; } cv::Mat_<cv::Vec3w> result; cv::addWeighted(mat0, 0.5, mat1, 0.5, 0, result); return result; } // draw a line that starts out red, ends up green void drawRedGreenLine( cv::Mat_<cv::Vec3w>& dst, const Camera::Vector2& r, const Camera::Vector2& g, const Camera::Vector2& m) { const cv::Scalar red = cv_util::createBGR<cv::Vec3w>(0, 0, 1); const cv::Scalar green = cv_util::createBGR<cv::Vec3w>(0, 1, 0); cv::line(dst, cv::Point2f(r.x(), r.y()), cv::Point2f(m.x(), m.y()), red, 2); cv::line(dst, cv::Point2f(g.x(), g.y()), cv::Point2f(m.x(), m.y()), green, 2); } template <typename T> cv::Mat_<T> projectImageBetweenCamerasNearest( const Camera& dst, const Camera& src, const cv::Mat_<T>& srcImage) { cv::Mat_<T> dstImage(cv::Size(dst.resolution.x(), dst.resolution.y())); for (int y = 0; y < dstImage.rows; ++y) { for (int x = 0; x < dstImage.cols; ++x) { Camera::Vector3 rig = dst.rigNearInfinity({x + 0.5, y + 0.5}); Camera::Vector2 srcPixel; if (src.sees(rig, srcPixel)) { dstImage(y, x) = srcImage.empty() ? cv_util::createBGR<T>(1, 1, 1) : srcImage(srcPixel.y(), srcPixel.x()); } else { dstImage(y, x) = cv_util::createBGR<T>(0, 0, 0); } } } return dstImage; } cv::Mat_<cv::Vec3w> renderOverlap( const Overlap& overlap, const FeatureMap& featureMap, const std::vector<Trace>& traces, const std::vector<Camera>& cameras) { // transform image 1 into image 0's space and overlay the two const ImageId& image0 = overlap.images[0]; const ImageId& image1 = overlap.images[1]; const Camera& camera0 = cameras[getCameraIndex(image0)]; const Camera& camera1 = cameras[getCameraIndex(image1)]; const std::vector<Feature>& features0 = featureMap.at(image0); const std::vector<Feature>& features1 = featureMap.at(image1); cv::Mat_<cv::Vec3w> result = blend( loadImage(image0), projectImageBetweenCamerasNearest(camera0, camera1, loadImage(image1))); for (const auto& match : overlap.matches) { Camera::Vector2 p0 = features0[match[0]].position; Camera::Vector2 p1 = features1[match[1]].position; int trace = features0[match[0]].trace; CHECK_EQ(trace, features1[match[1]].trace); Camera::Vector3 rig = trace < 0 ? triangulate({{camera0, p0}, {camera1, p1}}) : traces[trace].position; drawRedGreenLine( result, p0, // p0 in red camera0.pixel(camera1.rigNearInfinity(p1)), // transformed p1 in green camera0.pixel(rig)); // via transformed triangulation } return result; } cv::Mat_<cv::Vec3w> renderReprojections( const ImageId& image, const Camera& camera, const std::vector<Feature>& features, const std::vector<Trace>& traces, const Camera::Real scale) { cv::Mat_<cv::Vec3w> result = 0.5f * loadImage(image); cv::Mat_<cv::Vec3f> errors = cv::Mat::zeros(result.size(), CV_32FC3); const cv::Scalar green(0, 255, 0); const cv::Scalar red(0, 0, 255); for (const Feature& feature : features) { if (feature.trace >= 0) { // draw red line from image feature to reprojected world point // then continue in green in the same direction but scale x as far Camera::Vector2 proj = camera.pixel(traces[feature.trace].position); Camera::Vector2 error = proj - feature.position; // OpenCV can only save 1, 3, 4 channels so the third is simply an artifact errors(cv::Point(feature.position.x(), feature.position.y())) = cv::Vec3f(error.x(), error.y(), 0.0f); drawRedGreenLine(result, feature.position, proj + scale * error, proj); } } if (FLAGS_errors_dir != "") { filesystem::create_directories(FLAGS_errors_dir); std::string errorsFile = folly::sformat("{}/{}.exr", FLAGS_errors_dir, getCameraId(image)); cv_util::imwriteExceptionOnFail(errorsFile, errors); } return result; } std::string getReprojectionReport( const ceres::Problem& problem, const double* parameter = nullptr) { std::vector<double> norms = getReprojectionErrorNorms(problem, parameter, FLAGS_weighted_statistics); double total = 0; double totalSq = 0; for (double norm : norms) { total += norm; totalSq += norm * norm; } std::ostringstream result; result << "reprojections " << norms.size() << " " << "RMSE " << sqrt(totalSq / norms.size()) << " " << "average " << total / norms.size() << " " << "median " << calcPercentile(norms, 0.5) << " " << "90% " << calcPercentile(norms, 0.9) << " " << "99% " << calcPercentile(norms, 0.99) << " "; result << "worst 3: "; std::sort(norms.begin(), norms.end()); for (int i = norms.size() - 3; i < int(norms.size()); ++i) { result << norms[i] << " "; } return result.str(); } double acosClamp(double x) { return std::acos(std::min(std::max(-1.0, x), 1.0)); } std::string getCameraRmseReport( const std::vector<Camera>& cameras, const std::vector<Camera>& groundTruth) { Camera::Real position = 0; Camera::Real rotation = 0; Camera::Real principal = 0; Camera::Real distortion = 0; Camera::Real focal = 0; for (int i = 0; i < int(cameras.size()); ++i) { { auto before = groundTruth[i].position; auto after = cameras[i].position; position += (after - before).squaredNorm(); } for (int v = 0; v < 3; ++v) { auto before = groundTruth[i].rotation.row(v); auto after = cameras[i].rotation.row(v); rotation += (after - before).squaredNorm(); } { auto before = groundTruth[i].principal; auto after = cameras[i].principal; principal += (after - before).squaredNorm(); } { auto before = groundTruth[i].getDistortion(); auto after = cameras[i].getDistortion(); distortion += (after - before).squaredNorm(); } { auto before = groundTruth[i].focal; auto after = cameras[i].focal; focal += (after - before).squaredNorm(); } } Camera::Real angle = 0; int angleCount = 0; for (int i = 0; i < int(cameras.size()); ++i) { for (int j = 0; j < i; ++j) { for (int v = 2; v < 3; ++v) { // angle between camera i and j auto before = acosClamp(groundTruth[i].rotation.row(v).dot(groundTruth[j].rotation.row(v))); if (before > 1) { continue; // only count angles less than a radian } auto after = acosClamp(cameras[i].rotation.row(v).dot(cameras[j].rotation.row(v))); angle += (after - before) * (after - before); ++angleCount; } } } // average position /= cameras.size(); rotation /= 3 * cameras.size(); principal /= cameras.size(); distortion /= cameras.size(); focal /= cameras.size(); angle /= angleCount; std::ostringstream result; result << "RMSEs: " << "Pos " << sqrt(position) << " " << "Rot " << sqrt(rotation) << " " << "Principal " << sqrt(principal) << " " << "Distortion " << sqrt(distortion) << " " << "Focal " << sqrt(focal) << " " << "Angle " << sqrt(angle) << " "; return result.str(); } template <typename T> double* parameterBlock(T& t) { return t.data(); } template <> double* parameterBlock(Camera::Real& t) { return &t; } template <> double* parameterBlock(Trace& t) { return t.position.data(); } template <typename T> void lockParameter(ceres::Problem& problem, T& param, const bool lock = true) { if (lock) { problem.SetParameterBlockConstant(parameterBlock(param)); } else { problem.SetParameterBlockVariable(parameterBlock(param)); } } template <typename T> void lockParameters(ceres::Problem& problem, std::vector<T>& params, const bool lock = true) { for (T& param : params) { lockParameter(problem, param, lock); } } void reasonableResize(cv::Mat_<cv::Vec3w>& mat) { const double kWidth = 1200; const double kHeight = 800; double factor = std::min(kWidth / mat.cols, kHeight / mat.rows); if (factor < 1) { cv::resize(mat, mat, {}, factor, factor, cv::INTER_AREA); } } void showMatches( const std::vector<Camera>& cameras, const FeatureMap& featureMap, const std::vector<Overlap>& overlaps, const std::vector<Trace>& traces, const int pass) { // visualization for debugging for (const Overlap& overlap : overlaps) { const int idx0 = getCameraIndex(overlap.images[0]); const int idx1 = getCameraIndex(overlap.images[1]); if (cameras[idx0].overlap(cameras[idx1]) > FLAGS_debug_matches_overlap) { cv::Mat_<cv::Vec3w> image = renderOverlap(overlap, featureMap, traces, cameras); reasonableResize(image); if (!FLAGS_debug_dir.empty()) { std::string filename = FLAGS_debug_dir + "/" + "pass" + std::to_string(pass) + "_" + getCameraId(overlap.images[0]) + "-" + getCameraId(overlap.images[1]) + ".png"; imwrite(filename, image); } else { cv::imshow("overlap", image); cv::waitKey(); } } } } void showReprojections( const std::vector<Camera>& cameras, const FeatureMap& featureMap, const std::vector<Trace>& traces, const Camera::Real scale) { for (const auto& entry : featureMap) { const ImageId& image = entry.first; const std::vector<Feature>& features = entry.second; const Camera& camera = cameras[getCameraIndex(image)]; cv::Mat_<cv::Vec3w> render = renderReprojections(image, camera, features, traces, scale); reasonableResize(render); if (!FLAGS_debug_dir.empty()) { std::string filename = FLAGS_debug_dir + "/" + camera.id + ".png"; imwrite(filename, render); } else { cv::imshow("reprojections", render); cv::waitKey(); } } } // returns true with a probability of numerator / denominator bool randomSample(int numerator, int denominator) { static std::default_random_engine e; return numerator > std::uniform_int_distribution<>(0, denominator - 1)(e); } void solve(ceres::Problem& problem) { ceres::Solver::Options options; options.use_inner_iterations = true; options.max_num_iterations = 500; options.minimizer_progress_to_stdout = false; options.num_threads = ThreadPool::getThreadCountFromFlag(FLAGS_ceres_threads); if (options.num_threads == 0) { options.num_threads = 1; } options.function_tolerance = FLAGS_ceres_function_tolerance; ceres::Solver::Summary summary; LOG(INFO) << getReprojectionReport(problem); int previousFLAGS_v = FLAGS_v; // save FLAGS_v if (FLAGS_log_verbose) { FLAGS_v = std::max(1, FLAGS_v); // overwrite FLAGS_v } Solve(options, &problem, &summary); FLAGS_v = previousFLAGS_v; // restore FLAGS_v LOG(INFO) << summary.BriefReport(); if (FLAGS_log_verbose) { LOG(INFO) << summary.FullReport(); } if (summary.termination_type == ceres::NO_CONVERGENCE) { throw std::runtime_error("Failed to converge"); } LOG(INFO) << getReprojectionReport(problem); } void validateMatchCount(const std::vector<Camera>& cameras, const std::vector<int>& counts) { const double sum = std::accumulate(counts.begin(), counts.end(), 0.0); const double mean = sum / counts.size(); const double sqSum = std::inner_product(counts.begin(), counts.end(), counts.begin(), 0.0); const double stdev = std::sqrt(sqSum / counts.size() - mean * mean); std::vector<std::string> lowTraceErrors; for (int i = 0; i < int(counts.size()); ++i) { if (FLAGS_log_verbose) { LOG(INFO) << folly::sformat("Camera: {} Traces: {}", cameras[i].id, counts[i]); } const double z = (counts[i] - mean) / stdev; if (-z > FLAGS_outlier_z_threshold || counts[i] < FLAGS_min_traces) { lowTraceErrors.push_back( folly::sformat("Too few matches in camera {}: {}", cameras[i].id, counts[i])); } } if (!lowTraceErrors.empty()) { std::string errorString = boost::algorithm::join(lowTraceErrors, "\n"); throw std::runtime_error(errorString); } } void savePointsFileJson(FeatureMap& featureMap, const std::vector<Trace>& traces) { folly::dynamic arrayOfTraces = folly::dynamic::array(); for (const Trace& trace : traces) { if (trace.references.empty()) { continue; // don't output zombie traces, a different trace has the references now } folly::dynamic arrayOfFeatures = folly::dynamic::array(); for (const auto& ref : trace.references) { const ImageId& image = ref.first; const Feature& feature = featureMap[image][ref.second]; folly::dynamic featureSerialized = folly::dynamic::object("y", feature.position.y())( "x", feature.position.x())("image_id", ref.first); arrayOfFeatures.push_back(featureSerialized); } // clang-format off folly::dynamic traceSerialized = folly::dynamic::object("features", arrayOfFeatures)( "number of references", trace.references.size())("z", trace.position.z())( "y", trace.position.y())("x", trace.position.x()); // clang-format on arrayOfTraces.push_back(traceSerialized); } folly::dynamic points = folly::dynamic::object("points", arrayOfTraces); CHECK(folly::writeFile(folly::toPrettyJson(points), FLAGS_points_file_json.c_str())); } void savePointsFile(FeatureMap& featureMap, const std::vector<Trace>& traces) { std::ofstream file(FLAGS_points_file); for (const Trace& trace : traces) { if (trace.references.empty()) { continue; // don't output zombie traces, a different trace has the references now } file << folly::format("{} {} {} ", trace.position.x(), trace.position.y(), trace.position.z()); file << folly::format("1 "); // delimiter file << folly::format("0 0 0"); // RGB value for the point file << "\n"; } } std::vector<int> calculateCameraWeights( const std::vector<Camera>& cameras, const std::vector<Trace>& traces) { std::vector<int> weights(cameras.size(), 1); if (FLAGS_weight_by_trace_count) { for (ssize_t i = 0; i < ssize(cameras); ++i) { int cameraTraces = 0; for (const Trace& trace : traces) { for (const auto& reference : trace.references) { if (cameras[i].id == cameras[getCameraIndex(reference.first)].id) { cameraTraces++; continue; } } } weights[i] = cameraTraces; } } return weights; } bool positionsUnlocked(int pass) { return !FLAGS_lock_positions && pass != 0; } double refine( std::vector<Camera>& cameras, const std::vector<Camera>& groundTruth, FeatureMap featureMap, std::vector<Overlap> overlaps, const int pass) { boost::timer::cpu_timer timer; // remove outlier matches LOG(INFO) << "Removing outlier matches..."; removeOutliersFromCameras(overlaps, featureMap, {}, cameras, FLAGS_outlier_factor); // assemble and remove outlier traces LOG(INFO) << "Assembling traces and removing outlier traces..."; std::vector<Trace> traces = assembleTraces(featureMap, overlaps); triangulateTraces(traces, featureMap, cameras); removeOutliersFromCameras(overlaps, featureMap, traces, cameras, FLAGS_outlier_factor); // final triangulation LOG(INFO) << "Reassembling traces with outliers removed and removing invalid traces..."; traces = assembleTraces(featureMap, overlaps); if (!FLAGS_keep_invalid_traces) { removeInvalidTraces(traces, featureMap); } triangulateTraces(traces, featureMap, cameras); std::vector<int> weights = calculateCameraWeights(cameras, traces); // visualization for debugging showMatches(cameras, featureMap, overlaps, traces, pass); // read camera parameters from cameras std::vector<Camera::Vector3> positions; std::vector<Camera::Vector3> rotations; std::vector<Camera::Vector2> principals; std::vector<Camera::Real> focals; std::vector<Camera::Distortion> distortions; for (Camera& camera : cameras) { positions.push_back(camera.position); rotations.push_back(camera.getRotation()); principals.push_back(camera.principal); focals.push_back(camera.getScalarFocal()); distortions.push_back(camera.getDistortion()); } int referenceCameraIdx = -1; int relativeCameraIdx = -1; Camera::Real theta; Camera::Real phi; Camera::Real radius = 0.0; // initialized to silence compiler but this will never get used // If positions are unlocked, define a locked reference camera and lock the baseline between // the reference camera and relative camera if (positionsUnlocked(pass)) { if (FLAGS_reference_camera.empty()) { referenceCameraIdx = 0; } else { CHECK(cameraIdToIndex.count(FLAGS_reference_camera)) << "bad reference_camera: " << FLAGS_reference_camera; referenceCameraIdx = cameraIdToIndex[FLAGS_reference_camera]; } relativeCameraIdx = (referenceCameraIdx + 1) % ssize(cameras); Camera::Vector3 relativePosition = positions[relativeCameraIdx] - positions[referenceCameraIdx]; cartesianToSpherical(radius, theta, phi, relativePosition); } // create the problem: add a residual for each observation ceres::Problem problem; std::vector<int> counts(cameras.size()); for (const Trace& trace : traces) { if (FLAGS_cap_traces && !randomSample(FLAGS_cap_traces, traces.size())) { continue; } for (const auto& ref : trace.references) { const ImageId& image = ref.first; const Feature& feature = featureMap[image][ref.second]; const int camera = getCameraIndex(image); ++counts[camera]; const int group = cameraGroupToIndex[cameras[camera].group]; if (camera == relativeCameraIdx) { SphericalReprojectionFunctor::addResidual( problem, theta, phi, rotations[camera], principals[FLAGS_shared_principal_and_focal ? group : camera], focals[FLAGS_shared_principal_and_focal ? group : camera], distortions[FLAGS_shared_distortion ? group : camera], traces[feature.trace].position, radius, cameras[referenceCameraIdx].position, cameras[camera], feature.position, FLAGS_robust, weights[camera]); } else { ReprojectionFunctor::addResidual( problem, positions[camera], rotations[camera], principals[FLAGS_shared_principal_and_focal ? group : camera], focals[FLAGS_shared_principal_and_focal ? group : camera], distortions[FLAGS_shared_distortion ? group : camera], traces[feature.trace].position, cameras[camera], feature.position, FLAGS_robust, weights[camera]); } } } validateMatchCount(cameras, counts); // lock focal and distortion if (pass == 0 || FLAGS_lock_focal) { if (FLAGS_shared_principal_and_focal) { for (const auto& mapping : cameraGroupToIndex) { lockParameter(problem, focals[mapping.second]); } } else { lockParameters(problem, focals); } } if (pass == 0 || FLAGS_lock_distortion) { if (FLAGS_shared_distortion) { for (const auto& mapping : cameraGroupToIndex) { lockParameter(problem, distortions[mapping.second]); } } else { lockParameters(problem, distortions); } } if (FLAGS_lock_principals) { lockParameters(problem, principals); } // lock position LOG(INFO) << folly::sformat("Pass: {}", pass); // If positions are unlocked, only lock the position and rotation of the reference camera if (positionsUnlocked(pass)) { problem.SetParameterBlockConstant(positions[referenceCameraIdx].data()); problem.SetParameterBlockConstant(rotations[referenceCameraIdx].data()); } else { lockParameters(problem, positions); } if (FLAGS_lock_rotations) { lockParameters(problem, rotations); } if (FLAGS_robust) { std::vector<calibration::ReprojectionErrorOutlier> errorsIgnored = getReprojectionErrorOutliers(problem); LOG(INFO) << folly::sformat("Number of down-weighted outliers: {}", errorsIgnored.size()); std::sort(errorsIgnored.begin(), errorsIgnored.end(), math_util::sortdescPair<double, double>); LOG(INFO) << folly::sformat( "Highest 3 (true/weighted): {}/{}, {}/{}, {}/{}", errorsIgnored[2].first, errorsIgnored[2].second, errorsIgnored[1].first, errorsIgnored[1].second, errorsIgnored[0].first, errorsIgnored[0].second); } reportReprojectionErrors(overlaps, featureMap, traces, cameras); solve(problem); if (positionsUnlocked(pass)) { positions[relativeCameraIdx] = sphericalToCartesian(radius, theta, phi); positions[relativeCameraIdx] += positions[referenceCameraIdx]; } std::vector<double> norms = getReprojectionErrorNorms(problem, nullptr, FLAGS_weighted_statistics); double median = calcPercentile(norms, 0.5); if (pass == FLAGS_pass_count - 1 && median > FLAGS_max_error) { LOG(INFO) << folly::sformat("Warning: Final pass median error too high: {}", median); } // write optimized camera parameters back into cameras for (int i = 0; i < int(cameras.size()); ++i) { const int group = cameraGroupToIndex[cameras[i].group]; cameras[i] = makeCamera( cameras[i], positions[i], rotations[i], principals[FLAGS_shared_principal_and_focal ? group : i], focals[FLAGS_shared_principal_and_focal ? group : i], distortions[FLAGS_shared_distortion ? group : i]); } reportReprojectionErrors(overlaps, featureMap, traces, cameras); if (FLAGS_points_file != "" && pass == FLAGS_pass_count - 1) { savePointsFile(featureMap, traces); } if (FLAGS_points_file_json != "" && pass == FLAGS_pass_count - 1) { savePointsFileJson(featureMap, traces); } // visualization for debugging if (FLAGS_debug_error_scale && pass == FLAGS_pass_count - 1) { showReprojections(cameras, featureMap, traces, FLAGS_debug_error_scale); } if (FLAGS_enable_timing) { LOG(INFO) << folly::sformat("Pass {} timing :{}", pass, timer.format()); } return median; } double geometricCalibration() { CHECK_NE(FLAGS_rig_in, ""); CHECK_NE(FLAGS_rig_out, ""); if (FLAGS_debug_error_scale || FLAGS_debug_matches_overlap < 1) { CHECK_NE(FLAGS_color, ""); } if (!FLAGS_debug_dir.empty()) { filesystem::create_directories(FLAGS_debug_dir); } const Camera::Rig groundTruth = Camera::loadRig(FLAGS_rig_in); buildCameraIndexMaps(groundTruth); double medianError = 0; if (FLAGS_seed != -1) { std::srand(FLAGS_seed); } for (int experiment = 0; experiment < FLAGS_experiments; ++experiment) { Camera::Rig cameras = groundTruth; Camera::perturbCameras( cameras, FLAGS_perturb_positions, FLAGS_perturb_rotations, FLAGS_perturb_principals, FLAGS_perturb_focals); FeatureMap featureMap; std::vector<Overlap> overlaps; if (!FLAGS_matches.empty()) { folly::dynamic parsed = parseJsonFile(FLAGS_matches); featureMap = loadFeatureMap(parsed); overlaps = loadOverlaps(parsed); } else { generateArtificalPoints(featureMap, overlaps, groundTruth); } LOG(INFO) << getCameraRmseReport(cameras, groundTruth); boost::timer::cpu_timer timer; for (int pass = 0; pass < FLAGS_pass_count; ++pass) { medianError = refine(cameras, groundTruth, featureMap, overlaps, pass); std::cout << "pass " << pass << ": " << getCameraRmseReport(cameras, groundTruth) << std::endl; } if (FLAGS_enable_timing) { LOG(INFO) << folly::sformat("Aggregate timing: {}", timer.format()); } Camera::saveRig(FLAGS_rig_out, cameras); } return medianError; }