benchmarks/simple_pool_benchmark.cpp (211 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE.md file in the root directory of this source tree.
#include <chrono>
#include <cmath>
#include <dispenso/task_set.h>
#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/task_group.h"
#include "tbb/task_scheduler_init.h"
#endif // !BENCHMARK_WITHOUT_TBB
#include "thread_benchmark_common.h"
using namespace std::chrono_literals;
static constexpr int kSmallSize = 1000;
static constexpr int kMediumSize = 10000;
static constexpr int kLargeSize = 1000000;
struct alignas(64) Work {
size_t count;
void operator+=(size_t o) {
count += o;
}
};
Work g_work[1024];
std::atomic<int> g_tCounter{0};
inline int tid() {
static DISPENSO_THREAD_LOCAL int t = -1;
if (t < 0) {
t = g_tCounter++;
}
return t;
}
void BM_dispenso(benchmark::State& state) {
const int num_threads = state.range(0);
const int num_elements = state.range(1);
dispenso::ThreadPool pool(num_threads);
for (auto UNUSED_VAR : state) {
dispenso::TaskSet tasks(pool);
for (int i = 0; i < num_elements; ++i) {
auto* work = g_work;
tasks.schedule([i, work]() { work[tid() & 1023] += i; });
}
}
}
#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
const int num_threads = state.range(0);
const int num_elements = state.range(1);
tbb::task_scheduler_init initsched(num_threads);
for (auto UNUSED_VAR : state) {
tbb::task_group g;
for (int i = 0; i < num_elements; ++i) {
auto* work = g_work;
g.run([i, work]() { work[tid() & 1023] += i; });
}
g.wait();
}
}
void BM_tbb2(benchmark::State& state) {
const int num_threads = state.range(0);
const int num_elements = state.range(1);
for (auto UNUSED_VAR : state) {
tbb::task_scheduler_init initsched(num_threads);
tbb::task_group g;
for (int i = 0; i < num_elements; ++i) {
g.run([num_elements]() {
int num = std::sqrt(num_elements);
tbb::task_group g2;
for (int j = 0; j < num; ++j) {
auto* work = g_work;
g2.run([j, work]() { work[tid() & 1023] += j; });
}
g2.wait();
});
}
g.wait();
}
}
void BM_tbb_mostly_idle(benchmark::State& state) {
const int num_threads = state.range(0);
const int num_elements = state.range(1);
struct Recurse {
void operator()() const {
work[tid() & 1023] += i;
if (i < num_elements) {
++i;
g->run(*this);
}
}
mutable int i;
mutable Work* work;
mutable tbb::task_group* g;
int num_elements;
};
startRusage();
for (auto UNUSED_VAR : state) {
tbb::task_scheduler_init initsched(num_threads);
tbb::task_group g;
Recurse rec;
rec.i = 0;
rec.work = g_work;
rec.g = &g;
rec.num_elements = num_elements;
rec();
g.wait();
}
endRusage(state);
}
void BM_tbb_very_idle(benchmark::State& state) {
const int num_threads = state.range(0);
startRusage();
for (auto UNUSED_VAR : state) {
tbb::task_scheduler_init initsched(num_threads);
tbb::task_group g;
g.run([]() {});
std::this_thread::sleep_for(100ms);
g.run([]() {});
g.wait();
}
endRusage(state);
}
#endif // !BENCHMARK_WITHOUT_TBB
void BM_dispenso2(benchmark::State& state) {
const int num_threads = state.range(0);
const int num_elements = state.range(1);
for (auto UNUSED_VAR : state) {
dispenso::ThreadPool pool(num_threads);
for (int i = 0; i < num_elements; ++i) {
pool.schedule([&pool, num_elements]() {
int num = std::sqrt(num_elements);
dispenso::TaskSet tasks(pool);
for (int j = 0; j < num; ++j) {
auto* work = g_work;
tasks.schedule([j, work]() { work[tid() & 1023] += j; });
}
});
}
}
}
void BM_dispenso_mostly_idle(benchmark::State& state) {
const int num_threads = state.range(0);
const int num_elements = state.range(1);
struct Recurse {
void operator()() {
work[tid() & 1023] += i;
if (i < num_elements) {
++i;
pool->schedule(*this);
}
}
int i;
Work* work;
dispenso::ThreadPool* pool;
int num_elements;
};
startRusage();
for (auto UNUSED_VAR : state) {
dispenso::ThreadPool pool(num_threads);
Recurse rec;
rec.i = 0;
rec.work = g_work;
rec.pool = &pool;
rec.num_elements = num_elements;
rec();
}
endRusage(state);
}
void BM_dispenso_very_idle(benchmark::State& state) {
const int num_threads = state.range(0);
startRusage();
for (auto UNUSED_VAR : state) {
dispenso::ThreadPool pool(num_threads);
pool.schedule([]() {});
std::this_thread::sleep_for(100ms);
pool.schedule([]() {});
}
endRusage(state);
}
static void CustomArguments(benchmark::internal::Benchmark* b) {
for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
for (int s : pow2HalfStepThreads()) {
b->Args({s, j});
}
}
}
static void CustomArgumentsVeryIdle(benchmark::internal::Benchmark* b) {
for (int s : pow2HalfStepThreads()) {
b->Args({s});
}
}
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb2)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso2)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_mostly_idle)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_mostly_idle)
->Apply(CustomArguments)
->Unit(benchmark::kMicrosecond)
->UseRealTime();
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_very_idle)
->Apply(CustomArgumentsVeryIdle)
->Unit(benchmark::kMicrosecond)
->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_very_idle)
->Apply(CustomArgumentsVeryIdle)
->Unit(benchmark::kMicrosecond)
->UseRealTime();
BENCHMARK_MAIN();