void Graph::PrintTimeProfiling()

in src/core/scheduler/scheduler.cc [240:295]


void Graph::PrintTimeProfiling() {
  std::stringstream ss;

  // verbosity level: 1 -> forward and backward propagation time
  if (device_->verbosity() == 1) {
    bool forward = true;
    float forward_time = 0;
    float backward_time = 0;
    float time_elapsed;

    for (size_t i = 0; i < nodes_.size(); ++i)
      if (nodes_[i]->time_elapsed() > 0) {
        if (forward == true)
          // check the op of cross entropy backward, after that are backward ops
          // note that the function is more accurate when either
          // SoftmaxCrossEntropy or Softmax is used
          if (nodes_[i]->op_name().find("Backward") != std::string::npos)
            forward = false;
        // when forward becomes false, it starts the backward propagation

        time_elapsed = (nodes_[i]->time_elapsed()) /
                       (iteration_ - device_->skip_iteration());

        if (forward == true) forward_time += time_elapsed;
      }

    backward_time = (time_elapsed_ / (iteration_ - device_->skip_iteration())) -
                    forward_time;

    ss << std::endl << "Time Profiling:" << std::endl;
    ss << "Forward Propagation Time : " << forward_time << " sec" << std::endl;
    ss << "Backward Propagation Time : " << backward_time << " sec"
       << std::endl;
  }

  // verbosity level: 2 -> each operation time (OP_ID, operation name, time)
  if (device_->verbosity() == 2) {
    ss << std::endl << "Time Profiling:" << std::endl;
    for (size_t i = 0; i < nodes_.size(); ++i)
      if (nodes_[i]->time_elapsed() > 0)
        ss << "OP_ID" << nodes_[i]->id_ << ". " << nodes_[i]->op_name() << " : "
           << (nodes_[i]->time_elapsed()) / (iteration_) << " sec" << std::endl;
  }

  // verbosity level: 3 -> Distributed training operations
  if (device_->verbosity() == 3) {
    ss << std::endl << "Time Profiling:" << std::endl;
    for (size_t i = 0; i < nodes_.size(); ++i)
      if ((nodes_[i]->op_name().find("Dist") != std::string::npos) &&
          (nodes_[i]->time_elapsed() > 0))
        ss << "OP_ID" << nodes_[i]->id_ << ". " << nodes_[i]->op_name() << " : "
           << (nodes_[i]->time_elapsed()) / (iteration_) << " sec" << std::endl;
  }

  printf("%s", ss.str().c_str());
}