in benchmarks/nested_for_benchmark.cpp [103:135]
void BM_dispenso(benchmark::State& state) {
g_numThreads = state.range(0);
const int numElements = state.range(1);
dispenso::resizeGlobalThreadPool(g_numThreads);
uint64_t sum = 0;
int foo = 0;
auto input = getInputs(numElements);
for (auto UNUSED_VAR : state) {
std::vector<uint64_t> sums;
sums.reserve(g_numThreads);
++foo;
dispenso::parallel_for(
sums,
[]() { return uint64_t{0}; },
dispenso::makeChunkedRange(0, numElements, dispenso::ParForChunking::kAuto),
[numElements, input, foo](uint64_t& lsumStore, size_t j, size_t end) {
uint64_t lsum = 0;
for (; j != end; ++j) {
lsum += calculateInnerDispenso(input, foo, numElements);
}
lsumStore += lsum;
});
sum = 0;
for (auto s : sums) {
sum += s;
}
}
checkResults(input, sum, foo, numElements);
}