bench/RowOffsetBenchmark.cc (37 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <chrono>
#include <initializer_list>
#include <iomanip>
#include <iostream>
#ifdef _OPENMP
#include <omp.h>
#endif
#include "./BenchUtils.h"
#include "fbgemm/Fbgemm.h"
#include "src/OptimizedKernelsAvx2.h"
using namespace std;
using namespace fbgemm;
void performance_test() {
constexpr int NWARMUP = 4;
constexpr int NITER = 256;
cout << setw(4) << "len"
<< ", B_elements_per_sec" << endl;
for (int len : {1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17,
31, 32, 33, 63, 64, 65, 127, 128, 129, 255, 256}) {
aligned_vector<uint8_t> a(len);
double duration =
measureWithWarmup([&]() { reduceAvx2(a.data(), len); }, NWARMUP, NITER);
duration *= 1e9; // convert to ns
cout << setw(4) << len << ", " << setw(10) << setprecision(3)
<< len / duration << endl;
} // for each length
} // performance_test
int main() {
#ifdef _OPENMP
// Use 1 thread unless OMP_NUM_THREADS is explicit set.
const char* val = getenv("OMP_NUM_THREADS");
if (val == nullptr || !*val) {
omp_set_num_threads(1);
}
#endif
performance_test();
return 0;
}