std::unique_ptr DepthVideoProcessor::computeTracks()

in lib/Processor.cpp [646:886]


std::unique_ptr<DepthVideoTrackTable> DepthVideoProcessor::computeTracks(
    const Params& params) {
  const ColorStream& cs = video_->colorStream("down");
  const int w = cs.width();
  const int h = cs.height();

  const ColorStream* dynamicMaskStream = nullptr;
  Vector2f dynamicMaskScale(1.f, 1.f);
  if (video_->hasColorStream("dynamic_mask")) {
    dynamicMaskStream = &video_->colorStream("dynamic_mask");
    dynamicMaskScale = Vector2f(
        dynamicMaskStream->width() / float(w),
        dynamicMaskStream->height() / float(h));
  }

  struct Pixel {
    float cornerStrength;
    Vector2fna pos;
    Pixel(const float c, const Vector2fna& pos) : cornerStrength(c), pos(pos) {
    }
    bool operator<(const Pixel& other) const {
      return cornerStrength > other.cornerStrength;
    }
  };

  auto loadFlow = [&](const int frame) -> std::unique_ptr<Mat2f> {
    std::string flowFile = fmt::format(
        "{:s}/flow/flow_{:06d}_{:06d}.raw",
        video_->path(), frame, frame + 1);
    if (!fs::exists(flowFile)) {
      return nullptr;
    }
    std::unique_ptr<Mat2f> flow = std::make_unique<Mat2f>();
    freadim(flowFile, *flow);
    if (flow->cols != w || flow->rows != h) {
      return nullptr;
    }

    return flow;
  };

  auto loadFlowMask = [&](const int frame) -> std::unique_ptr<Mat1b> {
    std::string maskFile = fmt::format(
         "{:s}/flow_mask/mask_{:06d}_{:06d}.png",
         video_->path(), frame, frame + 1);
    if (!fs::exists(maskFile)) {
      return nullptr;
    }
    std::unique_ptr<Mat1b> mask = std::make_unique<Mat1b>();
    *mask = imread(maskFile, IMREAD_GRAYSCALE);
    if (mask->cols != w || mask->rows != h) {
      return nullptr;
    }

    return mask;
  };

  auto createDiskKernel = [&](const int radius) {
    const int size = 2 * radius + 1;
    Mat1b kernel(size, size);
    for (int y = 0; y < size; ++y) {
      uint8_t* ptr = kernel.ptr<uint8_t>(y);
      for (int x = 0; x < size; ++x) {
        const int rx = x - radius;
        const int ry = y - radius;
        ptr[x] = (sqr(rx) + sqr(ry) <= sqr(radius) ? 255 : 0);
      }
    }
    return kernel;
  };

  auto splatKernel = [&](
      Mat1b& mask, const Mat1b& kernel, const int x, const int y) {
    const int radius = kernel.cols / 2;
    const int mx0 = std::max(0, x - radius);
    const int mx1 = std::min(w - 1, x + radius);
    const int my0 = std::max(0, y - radius);
    const int my1 = std::min(h - 1, y + radius);
    for (int my = my0; my <= my1; ++my) {
      const int dy = my - (y - radius);
      const uint8_t* kernelPtr = kernel.ptr<const uint8_t>(dy);
      uint8_t* maskPtr = mask.ptr<uint8_t>(my);
      for (int mx = mx0; mx <= mx1; ++mx) {
        const int dx = mx - (x - radius);
        if (kernelPtr[dx]) {
          maskPtr[mx] = 255;
        }
      }
    }
  };

  std::vector<Pixel> pixels;
  pixels.reserve(w * h);

  Mat1b spawnKernel = createDiskKernel(params.trackSpawnDistance);
  Mat1b pruneKernel = createDiskKernel(params.trackPruneDistance);

  std::unique_ptr<DepthVideoTrackTable> tracks =
      std::make_unique<DepthVideoTrackTable>();

  std::unique_ptr<Mat2f> flow;
  std::unique_ptr<Mat1b> flowMask;

  for (int frame = 0; frame < video_->numFrames(); ++frame) {
    tracks->addFrame();

    if (!params.frameRange.inRange(frame)) {
      LOG(INFO) <<
          "Skipping frame << " << frame << " (not in specified range).";
      continue;
    }

    LOG(INFO) << "Tracking frame " << frame << "...";

    const Mat* color = cs.frame(frame).image();
    if (!color) {
      continue;
    }

    Mat1f dynamicDistance(h, w);

    if (dynamicMaskStream) {
      const Mat1b* dynamicMask = dynamicMaskStream->frame(frame).image1b();

      const int w = dynamicMask->cols;
      const int h = dynamicMask->rows;

      Mat1b binarized(h, w);
      for (int y = 0; y < h; ++y) {
        const uint8_t* srcPtr = dynamicMask->ptr<const uint8_t>(y);
        uint8_t* dstPtr = binarized.ptr<uint8_t>(y);
        for (int x = 0; x < w; ++x) {
          dstPtr[x] = (srcPtr[x] < 127 ? 0 : 255);
        }
      }

      distanceTransform(binarized, dynamicDistance, DIST_L2, DIST_MASK_5);
    } else {
      dynamicDistance = FLT_MAX;
    }

    Mat1b spawnMask(h, w, uint8_t(0));
    Mat1b pruneMask(h, w, uint8_t(0));

    // First, continue tracks from previous frame.
    if (frame > params.frameRange.firstFrame()) {
      std::unique_ptr<Mat2f> flow = loadFlow(frame - 1);
      std::unique_ptr<Mat1b> flowMask = loadFlowMask(frame - 1);

      if (flow && flowMask) {
        for (int trackId : tracks->frame(frame - 1).tracks) {
          auto& t = tracks->track(trackId);
          auto& o0 = t.obs(frame - 1);

          const float fx0 = o0.loc(0) * w;
          const float fy0 = o0.loc(1) / video_->invAspect() * h;
          const int ix0 = std::min(int(fx0 + 0.5f), w - 1);
          const int iy0 = std::min(int(fy0 + 0.5f), h - 1);

          if (!flowMask->at<uint8_t>(iy0, ix0)) {
            continue;
          }

          const Vec2f& f = flow->at<Vec2f>(iy0, ix0);
          const float fx1 = fx0 + f(0);
          const float fy1 = fy0 + f(1);
          const int ix1 = fx1 + 0.5f;
          const int iy1 = fy1 + 0.5f;
          if (ix1 >= 0 && ix1 < w && iy1 >= 0 && iy1 < h) {
            const int ix1s = fx1 * dynamicMaskScale.x();
            const int iy1s = fy1 * dynamicMaskScale.y();
            if (!pruneMask(iy1, ix1) &&
                dynamicDistance(iy1s, ix1s) >= params.minDynamicDistance) {
              Obs o1(fx1 / w, fy1 / h * video_->invAspect());
              tracks->addObs(trackId, frame, o1);
              splatKernel(pruneMask, pruneKernel, ix1, iy1);
              splatKernel(spawnMask, spawnKernel, ix1, iy1);
            }
          }
        }
      }
    }

    // Then, spawn some new tracks if there are large gaps.
    if (frame < params.frameRange.lastFrame()) {
      std::unique_ptr<Mat1b> flowMask = loadFlowMask(frame - 1);

      Mat1f gray;
      cvtColor(*color, gray, COLOR_BGR2GRAY);

      const int blockSize = 3;
      Mat1f cornerResponse;
      cornerMinEigenVal(gray, cornerResponse, blockSize);

      pixels.clear();
      for (int y = 0; y < h; ++y) {
        const float* const cornerPtr = cornerResponse.ptr<const float>(y);
        const uint8_t* const flowMaskPtr =
            (flowMask ? flowMask->ptr<const uint8_t>(y) : nullptr);
        const float* const dynamicDistancePtr =
            dynamicDistance.ptr<const float>(y * dynamicMaskScale.y());
        for (int x = 0; x < w; ++x) {
          const float dd = dynamicDistancePtr[int(x * dynamicMaskScale.x())];
          if ((!flowMaskPtr || flowMaskPtr[x]) &&
              dd > params.minDynamicDistance) {
            pixels.emplace_back(cornerPtr[x],
                Vector2fna(x / float(w), y / float(h) * video_->invAspect()));
          }
        }
      }

      // Sort pixels so that the most corner-like ones come first.
      std::sort(pixels.begin(), pixels.end());

      for (const Pixel& p : pixels) {
        const int x = p.pos.x() * w;
        const int y = p.pos.y() / float(video_->invAspect()) * h;

        if (spawnMask(y, x)) {
          continue;
        }

        tracks->createTrack(frame, Obs(p.pos.x(), p.pos.y()));

        splatKernel(spawnMask, spawnKernel, x, y);
      }
    }
  }

  // Prune short tracks
  for (int trackId = 0; trackId < tracks->numTracks(); ++trackId) {
    if (tracks->hasTrack(trackId)) {
      const Track& t = tracks->track(trackId);
      if (t.length() < params.minTrackLength) {
        tracks->deleteTrack(trackId);
      }
    }
  }

  return tracks;
}