in gloo/benchmark/runner.cc [272:358]
void Runner::run(BenchmarkFn<T>& fn, size_t n) {
std::vector<std::unique_ptr<Benchmark<T>>> benchmarks;
// Initialize one set of objects for every thread
for (auto i = 0; i < options_.threads; i++) {
auto context = contextFactory_->makeContext(
transportDevices_[i % transportDevices_.size()]);
context->base = options_.base;
auto benchmark = fn(context);
benchmark->initialize(n);
// Switch pairs to sync mode if configured to do so
if (options_.sync) {
for (int j = 0; j < context->size; j++) {
auto& pair = context->getPair(j);
if (pair) {
pair->setSync(true, options_.busyPoll);
}
}
}
// Verify correctness of initial run
if (options_.verify) {
benchmark->run();
benchmark->verify(mismatchErrors_);
barrier_->run();
}
benchmarks.push_back(std::move(benchmark));
}
// Create and run warmup jobs for every thread
Samples warmupResults = createAndRun(benchmarks, options_.warmupIterationCount);
// Iterations is the number of samples we will get.
// If none specified, it will calculate an initial
// iteration count based on the iteration time
// (default 2s) and median time spent during warmup iters.
auto iterations = options_.iterationCount;
if (iterations <= 0) {
GLOO_ENFORCE_GT(
options_.minIterationTimeNanos, 0,
"Iteration time must be positive");
// Sort warmup iteration times
Distribution warmup(warmupResults);
// Broadcast duration of median iteration during warmup,
// so all nodes agree on the number of iterations to run for.
auto nanos = broadcast(warmup.percentile(0.5));
iterations = std::max(1L, options_.minIterationTimeNanos / nanos);
}
Samples results;
// Run the benchmark until results are significant enough to report
while (1) {
results = createAndRun(benchmarks, iterations);
// If iteration count is explicitly specified by
// user, report these results right away
if (options_.iterationCount > 0) {
break;
}
// Report these results if benchmark has run
// for at least the minimum time
auto totalNanos = results.sum() / options_.threads;
if (totalNanos > options_.minIterationTimeNanos) {
break;
}
// Stop if this run already used the maximum number of iterations
if (iterations >= kMaxIterations) {
break;
}
// Otherwise, increase the number of iterations again
// and broadcast this value so all nodes agree on the
// number of iterations to run for
int nextIterations = static_cast<int>(kItersMultiplier * iterations);
// When iterations is too small and multiplier has no effect,
// just increment the number of iterations
if (nextIterations <= iterations) {
nextIterations++;
}
// Limit the number of iterations to kMaxIterations
iterations = broadcast(std::min(nextIterations, kMaxIterations));
}
// Print results
Distribution latency(results);
printDistribution(n, sizeof(T), latency);
}