void CudaGPU::TimeProfilingDoExec()

in src/core/device/cuda_gpu.cc [128:169]


void CudaGPU::TimeProfilingDoExec(function<void(Context*)>&& fn, int executor,
                                  Node* node) {
  // time profiling using cudaEvent
  cudaEventCreate(&(node->start_));
  cudaEventCreate(&(node->end_));

#ifdef USE_DIST
  if (node->op_name().find("Dist") != std::string::npos) {
    if (node->op_name().find("Dist_s") != std::string::npos)
      cudaEventRecord(node->start_, ctx_.s);
    else if (node->op_name().find("Dist_c1") != std::string::npos)
      cudaEventRecord(node->start_, ctx_.c1);
    else if (node->op_name().find("Dist_c2") != std::string::npos)
      cudaEventRecord(node->start_, ctx_.c2);
    else if (node->op_name().find("Dist_c1c2") != std::string::npos)
      cudaEventRecord(node->start_, ctx_.c1);
  } else {
    cudaEventRecord(node->start_, ctx_.stream);
  }
#else
  cudaEventRecord(node->start_, ctx_.stream);
#endif  // USE_DIST

  fn(&ctx_);

#ifdef USE_DIST
  if (node->op_name().find("Dist") != std::string::npos) {
    if (node->op_name().find("Dist_s") != std::string::npos)
      cudaEventRecord(node->end_, ctx_.s);
    else if (node->op_name().find("Dist_c1") != std::string::npos)
      cudaEventRecord(node->end_, ctx_.c1);
    else if (node->op_name().find("Dist_c2") != std::string::npos)
      cudaEventRecord(node->end_, ctx_.c2);
    else if (node->op_name().find("Dist_c1c2") != std::string::npos)
      cudaEventRecord(node->end_, ctx_.c2);
  } else {
    cudaEventRecord(node->end_, ctx_.stream);
  }
#else
  cudaEventRecord(node->end_, ctx_.stream);
#endif  // USE_DIST
}