in src/nms/nms_cpu.cpp [29:58]
at::Tensor comp_mat_cpu(const at::Tensor& bbx, float threshold) {
int64_t num = bbx.size(0);
int64_t blocks = div_up(num, THREADS_PER_BLOCK);
auto comp_mat = at::zeros({num, blocks}, bbx.options().dtype(at::ScalarType::Long));
AT_DISPATCH_FLOATING_TYPES(bbx.scalar_type(), "comp_mat_cpu", ([&] {
auto _bbx = bbx.accessor<scalar_t, 2>();
auto _comp_mat = comp_mat.accessor<int64_t, 2>();
for (int64_t i = 0; i < num; ++i) {
auto _bbx_i = _bbx[i];
auto _comp_mat_i = _comp_mat[i];
for (int64_t j = i + 1; j < num; ++j) {
auto _bbx_j = _bbx[j];
auto iou_ij = iou<scalar_t>(_bbx_i, _bbx_j);
if (iou_ij >= threshold) {
int64_t block_idx = j / THREADS_PER_BLOCK;
int64_t bit_idx = j % THREADS_PER_BLOCK;
_comp_mat_i[block_idx] |= int64_t(1) << bit_idx;
}
}
}
}));
return comp_mat;
}