in source/depth_estimation/Derp.cpp [103:225]
std::tuple<float, float> computeCost(
const PyramidLevel<PixelType>& pyramidLevel,
const int dstIdx,
const float disparity,
const int x,
const int y) {
// For a given (x, y, depth) in dst, we find the corresponding (x, y) in src,
// and then its reprojection into dst where src and dst are aligned up to
// translation. There we can extract a square patch from src and projected dst
// to compute the cost.
// For a given dst, we do this for every src
// Note that src and dst align up to translation when projected to infinity.
// Also note that when looking out to the world from the center of dst the
// color does not change with depth, so dst does not need to be transformed
//
// (1) pDst -> (2) pWorld -> (3) pSrc -> (4) pInf -> (5) pDstSrc
//
// (4)
// ... ...
// | |
// | |
// | |
// | |
// | |
// | |
// | (2) |
// | / |_ |
// ______|______/_ |_ ___|___________
// | | / | ||__ | |
// | | (1) | | || |
// | (5) | | (3) |
// | | | |
// | | | |
// |______________| |______________|
// dst src
// (1) pDst = (x, y)
// (2) get pWorld
const cv::Mat_<PixelType>& dstColor = pyramidLevel.dstProjColor(dstIdx);
const Camera& camDst = pyramidLevel.rigDst[dstIdx];
const Camera::Vector3 pWorld =
dstToWorldPoint(camDst, x, y, disparity, dstColor.cols, dstColor.rows);
// Compute SSD between dst and projected src for each src
using SSDPair = std::pair<float, float>;
STACK_ARRAY(SSDPair, SSDs, pyramidLevel.rigSrc.size());
int ssdCount = 0;
const cv::Mat_<PixelType>& dstColorBias = pyramidLevel.dstProjColorBias(dstIdx);
for (ssize_t srcIdx = 0; srcIdx < ssize(pyramidLevel.rigSrc); ++srcIdx) {
// No SSD if src = dst
if (srcIdx == pyramidLevel.dst2srcIdxs[dstIdx]) {
continue;
}
// (3) get pSrc
const Camera& camSrc = pyramidLevel.rigSrc[srcIdx];
const cv::Size& srcSize = pyramidLevel.srcColor(srcIdx).size();
Camera::Vector2 pSrc;
if (!worldToSrcPoint(pSrc, pWorld, camSrc, srcSize.width, srcSize.height)) {
continue;
}
// Exclude a half-texel band to simulate proper clamp-to-border semantics
const bool kExcludeHalfTexel = false;
if (kExcludeHalfTexel) {
if (pSrc.x() < 0.5 || srcSize.width - 0.5 < pSrc.x() || pSrc.y() < 0.5 ||
srcSize.height - 0.5 < pSrc.y()) {
continue;
}
}
// (3) -> (4) -> (5) mapping from pre-computed projection warp
const cv::Mat_<cv::Vec2f>& dstProjWarp = pyramidLevel.dstProjWarp(dstIdx, srcIdx);
const cv::Vec2f pDstSrc = cv_util::getPixelBilinear(dstProjWarp, pSrc.x(), pSrc.y());
// Check if pDstSrc is within bounds
const float xDstSrc = pDstSrc[0] + 0.5; // pDstSrc uses opencv coordinate convention
const float yDstSrc = pDstSrc[1] + 0.5;
if (std::isnan(xDstSrc) || std::isnan(yDstSrc)) {
continue;
}
// NOTE: bias of src projected into dst (srcBias) is the average of its
// patch values, which are bilinearly interpolated from floating point
// projected coordinates (pDstSrc). This is mathematically different than
// bilinearly interpolating the pre-computed projected bias around pDstSrc,
// because we are grabbing biases from neighboring footprints, but it seems
// to produce very similar results
const cv::Mat_<PixelType>& dstSrcColorBias = pyramidLevel.dstProjColorBias(dstIdx, srcIdx);
const PixelType dstSrcBias = cv_util::getPixelBilinear(dstSrcColorBias, xDstSrc, yDstSrc);
const PixelType& dstBias = dstColorBias(y, x);
const cv::Mat_<PixelType>& dstSrcColor = pyramidLevel.dstProjColor(dstIdx, srcIdx);
std::pair<float, float> ssd = computeSSD(
dstColor, x, y, dstBias, dstSrcColor, xDstSrc, yDstSrc, dstSrcBias, kSearchWindowRadius);
SSDs[ssdCount] = ssd;
++ssdCount;
}
int keep = kMinOverlappingCams - 1;
if (ssdCount < keep) {
return std::make_tuple(FLT_MAX, 0.0f); // not enough cameras see this disparity, skip
}
// Add up unbiased SSDs for all but the two patches with the worst biased SSDs
keep = std::max<int>(keep, ssdCount - 2);
std::nth_element(SSDs, SSDs + keep, SSDs + ssdCount);
float cost = 0;
for (int i = 0; i < keep; ++i) {
cost += SSDs[i].second;
}
cost /= keep;
// Trust costs when more cameras are involved
// This also means that we penalize closeup proposals (closer to camera rig
// means fewer cameras see that point)
const float trustCoef = 1.0f / keep;
const float dstVariance = pyramidLevel.dstVariance(dstIdx)(y, x);
const float confidence = std::max(dstVariance, kMinVar);
const float costFinal = cost * trustCoef / confidence;
return std::make_tuple(costFinal, confidence);
}