in src/core/scheduler/scheduler.cc [240:295]
void Graph::PrintTimeProfiling() {
std::stringstream ss;
// verbosity level: 1 -> forward and backward propagation time
if (device_->verbosity() == 1) {
bool forward = true;
float forward_time = 0;
float backward_time = 0;
float time_elapsed;
for (size_t i = 0; i < nodes_.size(); ++i)
if (nodes_[i]->time_elapsed() > 0) {
if (forward == true)
// check the op of cross entropy backward, after that are backward ops
// note that the function is more accurate when either
// SoftmaxCrossEntropy or Softmax is used
if (nodes_[i]->op_name().find("Backward") != std::string::npos)
forward = false;
// when forward becomes false, it starts the backward propagation
time_elapsed = (nodes_[i]->time_elapsed()) /
(iteration_ - device_->skip_iteration());
if (forward == true) forward_time += time_elapsed;
}
backward_time = (time_elapsed_ / (iteration_ - device_->skip_iteration())) -
forward_time;
ss << std::endl << "Time Profiling:" << std::endl;
ss << "Forward Propagation Time : " << forward_time << " sec" << std::endl;
ss << "Backward Propagation Time : " << backward_time << " sec"
<< std::endl;
}
// verbosity level: 2 -> each operation time (OP_ID, operation name, time)
if (device_->verbosity() == 2) {
ss << std::endl << "Time Profiling:" << std::endl;
for (size_t i = 0; i < nodes_.size(); ++i)
if (nodes_[i]->time_elapsed() > 0)
ss << "OP_ID" << nodes_[i]->id_ << ". " << nodes_[i]->op_name() << " : "
<< (nodes_[i]->time_elapsed()) / (iteration_) << " sec" << std::endl;
}
// verbosity level: 3 -> Distributed training operations
if (device_->verbosity() == 3) {
ss << std::endl << "Time Profiling:" << std::endl;
for (size_t i = 0; i < nodes_.size(); ++i)
if ((nodes_[i]->op_name().find("Dist") != std::string::npos) &&
(nodes_[i]->time_elapsed() > 0))
ss << "OP_ID" << nodes_[i]->id_ << ". " << nodes_[i]->op_name() << " : "
<< (nodes_[i]->time_elapsed()) / (iteration_) << " sec" << std::endl;
}
printf("%s", ss.str().c_str());
}