at::Tensor comp_mat_cpu()

in src/nms/nms_cpu.cpp [29:58]


at::Tensor comp_mat_cpu(const at::Tensor& bbx, float threshold) {
  int64_t num = bbx.size(0);
  int64_t blocks = div_up(num, THREADS_PER_BLOCK);

  auto comp_mat = at::zeros({num, blocks}, bbx.options().dtype(at::ScalarType::Long));

  AT_DISPATCH_FLOATING_TYPES(bbx.scalar_type(), "comp_mat_cpu", ([&] {
    auto _bbx = bbx.accessor<scalar_t, 2>();
    auto _comp_mat = comp_mat.accessor<int64_t, 2>();

    for (int64_t i = 0; i < num; ++i) {
      auto _bbx_i = _bbx[i];
      auto _comp_mat_i = _comp_mat[i];

      for (int64_t j = i + 1; j < num; ++j) {
        auto _bbx_j = _bbx[j];
        auto iou_ij = iou<scalar_t>(_bbx_i, _bbx_j);

        if (iou_ij >= threshold) {
          int64_t block_idx = j / THREADS_PER_BLOCK;
          int64_t bit_idx = j % THREADS_PER_BLOCK;

          _comp_mat_i[block_idx] |= int64_t(1) << bit_idx;
        }
      }
    }
  }));

  return comp_mat;
}