benchmarks/simple_for

// Copyright (c) Facebook, Inc. and its affiliates. // // This source code is licensed under the MIT license found in the // LICENSE.md file in the root directory of this source tree. #include <dispenso/parallel_for.h> #if defined(_OPENMP) #include <omp.h> #endif #include <unordered_map> #if !defined(BENCHMARK_WITHOUT_TBB) #include "tbb/blocked_range.h" #include "tbb/parallel_for.h" #include "tbb/task_scheduler_init.h" #endif // !BENCHMARK_WITHOUT_TBB #include "thread_benchmark_common.h" static uint32_t kSeed(8); static constexpr int kSmallSize = 1000; static constexpr int kMediumSize = 1000000; static constexpr int kLargeSize = 100000000; const std::vector<int>& getInputs(int num_elements) { static std::unordered_map<int, std::vector<int>> vecs; auto it = vecs.find(num_elements); if (it != vecs.end()) { return it->second; } // No need to use a high-quality rng for this test. srand(kSeed); std::vector<int> values; values.reserve(num_elements); for (int i = 0; i < num_elements; ++i) { values.push_back((rand() & 255) - 127); } auto res = vecs.emplace(num_elements, std::move(values)); assert(res.second); return res.first->second; } template <int num_elements> void BM_serial(benchmark::State& state) { std::vector<int> output(num_elements, 0); auto& input = getInputs(num_elements); for (auto UNUSED_VAR : state) { for (size_t i = 0; i < num_elements; ++i) { output[i] = input[i] * input[i] - 3 * input[i]; } } } void checkResults(const std::vector<int>& input, const std::vector<int>& output) { for (size_t i = 0; i < input.size(); ++i) { if (output[i] != input[i] * input[i] - 3 * input[i]) { std::cerr << "FAIL! " << output[i] << " vs " << input[i] * input[i] - 3 * input[i] << std::endl; abort(); } } } void BM_dispenso(benchmark::State& state) { const int num_threads = state.range(0); const int num_elements = state.range(1); std::vector<int> output(num_elements, 0); dispenso::ThreadPool pool(num_threads); auto& input = getInputs(num_elements); for (auto UNUSED_VAR : state) { dispenso::TaskSet tasks(pool); dispenso::parallel_for(tasks, 0, num_elements, [&input, &output](size_t i) { output[i] = input[i] * input[i] - 3 * input[i]; }); } checkResults(input, output); } #if defined(_OPENMP) void BM_omp(benchmark::State& state) { const int num_threads = state.range(0); const int num_elements = state.range(1); std::vector<int> output(num_elements, 0); omp_set_num_threads(num_threads); auto& input = getInputs(num_elements); for (auto UNUSED_VAR : state) { #pragma omp parallel for for (int i = 0; i < num_elements; ++i) { output[i] = input[i] * input[i] - 3 * input[i]; } } checkResults(input, output); } #endif /*defined(_OPENMP)*/ #if !defined(BENCHMARK_WITHOUT_TBB) void BM_tbb(benchmark::State& state) { const int num_threads = state.range(0); const int num_elements = state.range(1); std::vector<int> output(num_elements, 0); auto& input = getInputs(num_elements); for (auto UNUSED_VAR : state) { tbb::task_scheduler_init initsched(num_threads); tbb::parallel_for( tbb::blocked_range<size_t>(0, num_elements), [&input, &output](const tbb::blocked_range<size_t>& r) { for (size_t i = r.begin(); i < r.end(); ++i) { output[i] = input[i] * input[i] - 3 * input[i]; } }); } checkResults(input, output); } #endif // !BENCHMARK_WITHOUT_TBB static void CustomArguments(benchmark::internal::Benchmark* b) { for (int j : {kSmallSize, kMediumSize, kLargeSize}) { for (int i : pow2HalfStepThreads()) { b->Args({i, j}); } } } BENCHMARK_TEMPLATE(BM_serial, kSmallSize); BENCHMARK_TEMPLATE(BM_serial, kMediumSize); BENCHMARK_TEMPLATE(BM_serial, kLargeSize); #if defined(_OPENMP) BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime(); #endif // OPENMP #if !defined(BENCHMARK_WITHOUT_TBB) BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime(); #endif // !BENCHMARK_WITHOUT_TBB BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime(); BENCHMARK_MAIN();

benchmarks/simple_for_benchmark.cpp (117 lines of code) (raw):