bench/BenchUtils.cc (182 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "./BenchUtils.h"
#include <algorithm>
#include <cstring>
#include <random>
#include <type_traits>
#ifdef _OPENMP
#include <omp.h>
#endif
namespace fbgemm {
std::default_random_engine eng;
template <typename T>
void randFill(aligned_vector<T>& vec, T low, T high, std::true_type) {
std::uniform_int_distribution<int> dis(low, high);
std::generate(vec.begin(), vec.end(), [&] { return dis(eng); });
}
template <typename T>
void randFill(aligned_vector<T>& vec, T low, T high, std::false_type) {
std::uniform_real_distribution<T> dis(low, high);
std::generate(vec.begin(), vec.end(), [&] { return dis(eng); });
}
template <typename T>
void randFill(aligned_vector<T>& vec, T low, T high) {
randFill(vec, low, high, std::is_integral<T>());
}
template void
randFill<float>(aligned_vector<float>& vec, float low, float high);
template void
randFill<uint8_t>(aligned_vector<uint8_t>& vec, uint8_t low, uint8_t high);
template void
randFill<int8_t>(aligned_vector<int8_t>& vec, int8_t low, int8_t high);
template void randFill<int>(aligned_vector<int>& vec, int low, int high);
// template void
// randFill<int64_t>(aligned_vector<int64_t>& vec, int64_t low, int64_t high);
template <>
void randFill(aligned_vector<int64_t>& vec, int64_t low, int64_t high) {
std::uniform_int_distribution<int64_t> dis(low, high);
std::generate(vec.begin(), vec.end(), [&] { return dis(eng); });
}
void llc_flush(std::vector<char>& llc) {
volatile char* data = llc.data();
for (size_t i = 0; i < llc.size(); i++) {
data[i]++;
}
}
int fbgemm_get_max_threads() {
#if defined(FBGEMM_MEASURE_TIME_BREAKDOWN) || !defined(_OPENMP)
return 1;
#else
return omp_get_max_threads();
#endif
}
int fbgemm_get_num_threads() {
#if defined(FBGEMM_MEASURE_TIME_BREAKDOWN) || !defined(_OPENMP)
return 1;
#else
return omp_get_num_threads();
#endif
}
int fbgemm_get_thread_num() {
#if defined(FBGEMM_MEASURE_TIME_BREAKDOWN) || !defined(_OPENMP)
return 0;
#else
return omp_get_thread_num();
#endif
}
int parseArgumentInt(
int argc,
const char* argv[],
const char* arg,
int non_exist_val,
int def_val) {
int val = non_exist_val;
int arg_len = strlen(arg);
for (auto i = 1; i < argc; ++i) {
const char* ptr = strstr(argv[i], arg);
if (ptr) {
int res = atoi(ptr + arg_len);
val = (*(ptr + arg_len - 1) == '=') ? res : def_val;
break;
}
}
return val;
}
bool parseArgumentBool(
int argc,
const char* argv[],
const char* arg,
bool def_val) {
for (auto i = 1; i < argc; ++i) {
const char* ptr = strstr(argv[i], arg);
if (ptr) {
return true;
}
}
return def_val;
}
#if defined(USE_MKL)
void test_xerbla(char* srname, const int* info, int) {
// srname - name of the function that called xerbla
// info - position of the invalid parameter in the parameter list
// len - length of the name in bytes
printf("\nXERBLA(MKL Error) is called :%s: %d\n", srname, *info);
}
#endif
aligned_vector<float> getRandomSparseVector(
unsigned size,
float fractionNonZeros) {
aligned_vector<float> res(size);
std::mt19937 gen(345);
std::uniform_real_distribution<double> dis(0.0, 1.0);
for (auto& f : res) {
f = dis(gen);
}
// Create exactly fractionNonZeros in result
aligned_vector<float> sorted_res(res);
std::sort(sorted_res.begin(), sorted_res.end());
int32_t numZeros =
size - static_cast<int32_t>(std::round(size * fractionNonZeros));
float thr;
if (numZeros) {
thr = sorted_res[numZeros - 1];
for (auto& f : res) {
if (f <= thr) {
f = 0.0f;
}
}
}
return res;
}
template <typename T>
aligned_vector<T> getRandomBlockSparseMatrix(
int Rows,
int Cols,
float fractionNonZerosBlocks,
int RowBlockSize,
int ColBlockSize,
T low,
T high) {
aligned_vector<T> res(Rows * Cols, 0);
std::mt19937 gen(345);
std::uniform_int_distribution<int> dis(low, high);
std::bernoulli_distribution bernDis{fractionNonZerosBlocks};
int rowBlocks = (Rows + RowBlockSize - 1) / RowBlockSize;
int colBlocks = (Cols + ColBlockSize - 1) / ColBlockSize;
int fnzb = 0;
for (int i = 0; i < rowBlocks; ++i) {
for (int j = 0; j < colBlocks; ++j) {
if (bernDis(gen)) {
// fill in this block
for (int i_b = 0; i_b < std::min(RowBlockSize, Rows - i * RowBlockSize);
++i_b) {
for (int j_b = 0;
j_b < std::min(ColBlockSize, Cols - j * ColBlockSize);
++j_b) {
res[(i * RowBlockSize + i_b) * Cols + j * ColBlockSize + j_b] =
dis(gen);
}
}
fnzb++;
}
}
}
// std::cout << "Requested non-zero fraction: " << fractionNonZerosBlocks
// << " , generated non-zero fraction: "
// << static_cast<float>(fnzb) / rowBlocks / colBlocks << std::endl;
// std::cout << "Requested non-zero blocks: "
// << rowBlocks * colBlocks * fractionNonZerosBlocks
// << ", generated non-zero blocks: " << fnzb << std::endl;
return res;
}
template aligned_vector<uint8_t> getRandomBlockSparseMatrix(
int Rows,
int Cols,
float fractionNonZerosBlocks,
int RowBlockSize,
int ColBlockSize,
uint8_t low,
uint8_t high);
template aligned_vector<int8_t> getRandomBlockSparseMatrix(
int Rows,
int Cols,
float fractionNonZerosBlocks,
int RowBlockSize,
int ColBlockSize,
int8_t low,
int8_t high);
template aligned_vector<int32_t> getRandomBlockSparseMatrix(
int Rows,
int Cols,
float fractionNonZerosBlocks,
int RowBlockSize,
int ColBlockSize,
int32_t low,
int32_t high);
} // namespace fbgemm