in libkineto/src/CuptiActivityProfiler.cpp [394:476]
void CuptiActivityProfiler::configure(
const Config& config,
const time_point<system_clock>& now) {
std::lock_guard<std::mutex> guard(mutex_);
if (isActive()) {
LOG(ERROR) << "CuptiActivityProfiler already busy, terminating";
return;
}
config_ = config.clone();
if (config_->activitiesDuration().count() == 0) {
// Use default if not specified
config_->setActivitiesDuration(
config_->activitiesDurationDefault());
}
profileStartTime_ = config_->requestTimestamp();
if (profileStartTime_ < now) {
LOG(ERROR) << "Not starting tracing - start timestamp is in the past. Time difference (ms): " << duration_cast<milliseconds>(now - profileStartTime_).count();
return;
} else if ((profileStartTime_ - now) < config_->activitiesWarmupDuration()) {
LOG(ERROR) << "Not starting tracing - insufficient time for warmup. Time to warmup (ms): " << duration_cast<milliseconds>(profileStartTime_ - now).count() ;
return;
}
if (LOG_IS_ON(INFO)) {
config_->printActivityProfilerConfig(LIBKINETO_DBG_STREAM);
}
if (!cpuOnly_ && !libkineto::api().client()) {
LOG(INFO) << "GPU-only tracing for "
<< config_->activitiesDuration().count() << "ms";
}
// Ensure we're starting in a clean state
resetTraceData();
#if !USE_GOOGLE_LOG
// Add a LoggerObserverCollector to collect all logs during the trace.
loggerCollectorMetadata_ = std::make_unique<LoggerCollector>();
Logger::addLoggerObserver(loggerCollectorMetadata_.get());
#endif // !USE_GOOGLE_LOG
#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER)
if (!cpuOnly_) {
// Enabling CUPTI activity tracing incurs a larger perf hit at first,
// presumably because structures are allocated and initialized, callbacks
// are activated etc. After a while the overhead decreases and stabilizes.
// It's therefore useful to perform some warmup before starting recording.
LOG(INFO) << "Enabling GPU tracing";
cupti_.setMaxBufferSize(config_->activitiesMaxGpuBufferSize());
time_point<system_clock> timestamp;
if (VLOG_IS_ON(1)) {
timestamp = system_clock::now();
}
#ifdef HAS_CUPTI
cupti_.enableCuptiActivities(config_->selectedActivityTypes());
#else
cupti_.enableActivities(config_->selectedActivityTypes());
#endif
if (VLOG_IS_ON(1)) {
auto t2 = system_clock::now();
addOverheadSample(
setupOverhead_, duration_cast<microseconds>(t2 - timestamp).count());
}
}
#endif // HAS_CUPTI || HAS_ROCTRACER
if (profilers_.size() > 0) {
configureChildProfilers();
}
if (libkineto::api().client()) {
libkineto::api().client()->warmup(config_->isOpInputsCollectionEnabled());
}
LOG(INFO) << "Tracing starting in "
<< duration_cast<seconds>(profileStartTime_ - now).count() << "s";
traceBuffers_ = std::make_unique<ActivityBuffers>();
captureWindowStartTime_ = captureWindowEndTime_ = 0;
currentRunloopState_ = RunloopState::Warmup;
}