void BM_dispenso()

in benchmarks/nested_for_benchmark.cpp [103:135]


void BM_dispenso(benchmark::State& state) {
  g_numThreads = state.range(0);
  const int numElements = state.range(1);

  dispenso::resizeGlobalThreadPool(g_numThreads);

  uint64_t sum = 0;
  int foo = 0;

  auto input = getInputs(numElements);
  for (auto UNUSED_VAR : state) {
    std::vector<uint64_t> sums;
    sums.reserve(g_numThreads);
    ++foo;
    dispenso::parallel_for(
        sums,
        []() { return uint64_t{0}; },
        dispenso::makeChunkedRange(0, numElements, dispenso::ParForChunking::kAuto),
        [numElements, input, foo](uint64_t& lsumStore, size_t j, size_t end) {
          uint64_t lsum = 0;
          for (; j != end; ++j) {
            lsum += calculateInnerDispenso(input, foo, numElements);
          }
          lsumStore += lsum;
        });
    sum = 0;
    for (auto s : sums) {
      sum += s;
    }
  }

  checkResults(input, sum, foo, numElements);
}