in benchmarks/trivial_compute_benchmark.cpp [131:154]
void BM_tbb(benchmark::State& state) {
const int num_threads = state.range(0);
const int num_elements = state.range(1);
uint64_t sum = 0;
int foo = 0;
auto input = getInputs(num_elements);
for (auto UNUSED_VAR : state) {
tbb::task_scheduler_init initsched(num_threads);
++foo;
sum = tbb::parallel_reduce(
tbb::blocked_range<size_t>(0, num_elements),
uint64_t{0},
[input, foo](const tbb::blocked_range<size_t>& r, uint64_t init) -> uint64_t {
for (size_t a = r.begin(); a != r.end(); ++a)
init += calculate(input, a, foo);
return init;
},
[](uint64_t x, uint64_t y) -> uint64_t { return x + y; });
}
checkResults(input, sum, foo, num_elements);
}