in benchmarks/summing_for_benchmark.cpp [72:107]
void BM_dispenso(benchmark::State& state) {
const int num_threads = state.range(0);
const int num_elements = state.range(1);
dispenso::ThreadPool pool(num_threads);
int64_t sum = 0;
int foo = 0;
auto& input = getInputs(num_elements);
for (auto UNUSED_VAR : state) {
dispenso::TaskSet tasks(pool);
std::vector<int64_t> sums;
sums.reserve(num_threads);
++foo;
dispenso::parallel_for(
tasks,
sums,
[]() { return int64_t{0}; },
dispenso::makeChunkedRange(0, num_elements, dispenso::ParForChunking::kAuto),
[&input, foo](int64_t& lsumStore, size_t i, size_t end) {
int64_t lsum = 0;
for (; i != end; ++i) {
lsum += input[i] * input[i] - 3 * foo * input[i];
}
lsumStore += lsum;
});
sum = 0;
for (auto s : sums) {
sum += s;
}
}
checkResults(input, sum, foo);
}