void Runner::run()

in gloo/benchmark/runner.cc [272:358]


void Runner::run(BenchmarkFn<T>& fn, size_t n) {
  std::vector<std::unique_ptr<Benchmark<T>>> benchmarks;

  // Initialize one set of objects for every thread
  for (auto i = 0; i < options_.threads; i++) {
    auto context = contextFactory_->makeContext(
        transportDevices_[i % transportDevices_.size()]);
    context->base = options_.base;
    auto benchmark = fn(context);
    benchmark->initialize(n);

    // Switch pairs to sync mode if configured to do so
    if (options_.sync) {
      for (int j = 0; j < context->size; j++) {
        auto& pair = context->getPair(j);
        if (pair) {
          pair->setSync(true, options_.busyPoll);
        }
      }
    }

    // Verify correctness of initial run
    if (options_.verify) {
      benchmark->run();
      benchmark->verify(mismatchErrors_);
      barrier_->run();
    }

    benchmarks.push_back(std::move(benchmark));
  }

  // Create and run warmup jobs for every thread
  Samples warmupResults = createAndRun(benchmarks, options_.warmupIterationCount);

  // Iterations is the number of samples we will get.
  // If none specified, it will calculate an initial
  // iteration count based on the iteration time
  // (default 2s) and median time spent during warmup iters.
  auto iterations = options_.iterationCount;
  if (iterations <= 0) {
    GLOO_ENFORCE_GT(
      options_.minIterationTimeNanos, 0,
      "Iteration time must be positive");
    // Sort warmup iteration times
    Distribution warmup(warmupResults);
    // Broadcast duration of median iteration during warmup,
    // so all nodes agree on the number of iterations to run for.
    auto nanos = broadcast(warmup.percentile(0.5));
    iterations = std::max(1L, options_.minIterationTimeNanos / nanos);
  }

  Samples results;
  // Run the benchmark until results are significant enough to report
  while (1) {
    results = createAndRun(benchmarks, iterations);
    // If iteration count is explicitly specified by
    // user, report these results right away
    if (options_.iterationCount > 0) {
      break;
    }
    // Report these results if benchmark has run
    // for at least the minimum time
    auto totalNanos = results.sum() / options_.threads;
    if (totalNanos > options_.minIterationTimeNanos) {
      break;
    }
    // Stop if this run already used the maximum number of iterations
    if (iterations >= kMaxIterations) {
      break;
    }
    // Otherwise, increase the number of iterations again
    // and broadcast this value so all nodes agree on the
    // number of iterations to run for
    int nextIterations = static_cast<int>(kItersMultiplier * iterations);
    // When iterations is too small and multiplier has no effect,
    // just increment the number of iterations
    if (nextIterations <= iterations) {
      nextIterations++;
    }
    // Limit the number of iterations to kMaxIterations
    iterations = broadcast(std::min(nextIterations, kMaxIterations));
  }

  // Print results
  Distribution latency(results);
  printDistribution(n, sizeof(T), latency);
}