in gloo/benchmark/cuda_main.cc [176:216]
void runBenchmark(options& x) {
Runner::BenchmarkFn<T> fn;
if (x.benchmark == "cuda_broadcast_one_to_all") {
fn = [&x](std::shared_ptr<Context>& context) {
return std::unique_ptr<Benchmark<T>>(
new CudaBroadcastBenchmark<T, CudaBroadcastOneToAll<T>>(context, x));
};
} else if (x.benchmark == "cuda_allreduce_halving_doubling") {
fn = [&x](std::shared_ptr<Context>& context) {
return std::unique_ptr<Benchmark<T>>(
new CudaAllreduceBenchmark<
T,
CudaAllreduceHalvingDoubling<T>>(context, x));
};
} else if (x.benchmark == "cuda_allreduce_halving_doubling_pipelined") {
fn = [&x](std::shared_ptr<Context>& context) {
return std::unique_ptr<Benchmark<T>>(
new CudaAllreduceBenchmark<
T,
CudaAllreduceHalvingDoublingPipelined<T>>(context, x));
};
} else if (x.benchmark == "cuda_allreduce_bcube") {
fn = [&x](std::shared_ptr<Context>& context) {
return std::unique_ptr<Benchmark<T>>(
new CudaAllreduceBenchmark<T, CudaAllreduceBcube<T>>(context, x));
};
} else if (x.benchmark == "cuda_allreduce_ring_chunked") {
fn = [&x](std::shared_ptr<Context>& context) {
return std::unique_ptr<Benchmark<T>>(
new CudaAllreduceBenchmark<T, CudaAllreduceRingChunked<T>>(context, x));
};
}
if (!fn) {
GLOO_ENFORCE(false, "Invalid algorithm: ", x.benchmark);
}
Runner r(x);
r.run(fn);
}