in libkineto/src/CuptiActivityProfiler.cpp [530:628]
const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
const time_point<system_clock>& now,
const time_point<system_clock>& nextWakeupTime) {
auto new_wakeup_time = nextWakeupTime;
switch (currentRunloopState_) {
case RunloopState::WaitForRequest:
// Nothing to do
break;
case RunloopState::Warmup:
VLOG(1) << "State: Warmup";
#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER)
// Flushing can take a while so avoid doing it close to the start time
if (!cpuOnly_ && nextWakeupTime < profileStartTime_) {
cupti_.clearActivities();
}
if (cupti_.stopCollection) {
// Go to process trace to clear any outstanding buffers etc
LOG(WARNING) << "Trace terminated during warmup";
std::lock_guard<std::mutex> guard(mutex_);
stopTraceInternal(now);
resetInternal();
VLOG(0) << "Warmup -> WaitForRequest";
break;
}
#endif // HAS_CUPTI || HAS_ROCTRACER
if (now >= profileStartTime_) {
UST_LOGGER_MARK_COMPLETED(kWarmUpStage);
if (now > profileStartTime_ + milliseconds(10)) {
LOG(WARNING)
<< "Tracing started "
<< duration_cast<milliseconds>(now - profileStartTime_).count()
<< "ms late!";
} else {
LOG(INFO) << "Tracing started";
}
startTrace(now);
if (libkineto::api().client()) {
libkineto::api().client()->start();
}
if (nextWakeupTime > profileEndTime_) {
new_wakeup_time = profileEndTime_;
}
} else if (nextWakeupTime > profileStartTime_) {
new_wakeup_time = profileStartTime_;
}
break;
case RunloopState::CollectTrace:
VLOG(1) << "State: CollectTrace";
// captureWindowStartTime_ can be set by external threads,
// so recompute end time.
// FIXME: Is this a good idea for synced start?
{
std::lock_guard<std::mutex> guard(mutex_);
profileEndTime_ = time_point<system_clock>(
microseconds(captureWindowStartTime_)) +
config_->activitiesDuration();
}
if (now >= profileEndTime_ || stopCollection_.exchange(false)
#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER)
|| cupti_.stopCollection
#endif // HAS_CUPTI || HAS_ROCTRACER
){
// Update runloop state first to prevent further updates to shared state
LOG(INFO) << "Tracing complete";
// FIXME: Need to communicate reason for stopping on errors
if (libkineto::api().client()) {
libkineto::api().client()->stop();
}
std::lock_guard<std::mutex> guard(mutex_);
stopTraceInternal(now);
VLOG_IF(0, now >= profileEndTime_) << "Reached profile end time";
UST_LOGGER_MARK_COMPLETED(kCollectionStage);
} else if (now < profileEndTime_ && profileEndTime_ < nextWakeupTime) {
new_wakeup_time = profileEndTime_;
}
break;
case RunloopState::ProcessTrace:
VLOG(1) << "State: ProcessTrace";
// FIXME: Probably want to allow interruption here
// for quickly handling trace request via synchronous API
std::lock_guard<std::mutex> guard(mutex_);
processTraceInternal(*logger_);
UST_LOGGER_MARK_COMPLETED(kPostProcessingStage);
resetInternal();
VLOG(0) << "ProcessTrace -> WaitForRequest";
break;
}
return new_wakeup_time;
}