in benchmarks/nested_for_benchmark.cpp [82:101]
uint64_t calculateInnerDispenso(uint64_t input, size_t foo, int numElements) {
std::vector<uint64_t> sums;
sums.reserve(g_numThreads);
dispenso::parallel_for(
sums,
[]() { return uint64_t{0}; },
dispenso::makeChunkedRange(0, kWorkMultiplier * numElements, dispenso::ParForChunking::kAuto),
[input, foo](uint64_t& lsumStore, size_t i, size_t end) {
uint64_t lsum = 0;
for (; i != end; ++i) {
lsum += calculate(input, i, foo);
}
lsumStore += lsum;
});
uint64_t sum = 0;
for (auto s : sums) {
sum += s;
}
return sum;
}