in src/core/device/cuda_gpu.cc [128:169]
void CudaGPU::TimeProfilingDoExec(function<void(Context*)>&& fn, int executor,
Node* node) {
// time profiling using cudaEvent
cudaEventCreate(&(node->start_));
cudaEventCreate(&(node->end_));
#ifdef USE_DIST
if (node->op_name().find("Dist") != std::string::npos) {
if (node->op_name().find("Dist_s") != std::string::npos)
cudaEventRecord(node->start_, ctx_.s);
else if (node->op_name().find("Dist_c1") != std::string::npos)
cudaEventRecord(node->start_, ctx_.c1);
else if (node->op_name().find("Dist_c2") != std::string::npos)
cudaEventRecord(node->start_, ctx_.c2);
else if (node->op_name().find("Dist_c1c2") != std::string::npos)
cudaEventRecord(node->start_, ctx_.c1);
} else {
cudaEventRecord(node->start_, ctx_.stream);
}
#else
cudaEventRecord(node->start_, ctx_.stream);
#endif // USE_DIST
fn(&ctx_);
#ifdef USE_DIST
if (node->op_name().find("Dist") != std::string::npos) {
if (node->op_name().find("Dist_s") != std::string::npos)
cudaEventRecord(node->end_, ctx_.s);
else if (node->op_name().find("Dist_c1") != std::string::npos)
cudaEventRecord(node->end_, ctx_.c1);
else if (node->op_name().find("Dist_c2") != std::string::npos)
cudaEventRecord(node->end_, ctx_.c2);
else if (node->op_name().find("Dist_c1c2") != std::string::npos)
cudaEventRecord(node->end_, ctx_.c2);
} else {
cudaEventRecord(node->end_, ctx_.stream);
}
#else
cudaEventRecord(node->end_, ctx_.stream);
#endif // USE_DIST
}