in benchmarks/nested_for_benchmark.cpp [183:206]
void BM_tbb(benchmark::State& state) {
g_numThreads = state.range(0);
const int numElements = state.range(1);
uint64_t sum = 0;
int foo = 0;
auto input = getInputs(numElements);
for (auto UNUSED_VAR : state) {
tbb::task_scheduler_init initsched(g_numThreads);
++foo;
sum = tbb::parallel_reduce(
tbb::blocked_range<size_t>(0, numElements),
uint64_t{0},
[numElements, input, foo](const tbb::blocked_range<size_t>& r, uint64_t init) -> uint64_t {
for (size_t a = r.begin(); a != r.end(); ++a)
init += calculateInnerTbb(input, foo, numElements);
return init;
},
[](uint64_t x, uint64_t y) -> uint64_t { return x + y; });
}
checkResults(input, sum, foo, numElements);
}