in libkineto/src/EventProfilerController.cpp [273:421]
void EventProfilerController::profilerLoop() {
// We limit the number of profilers that can exist per GPU
auto config = configLoader_.getConfigCopy();
if (!enableForDevice(*config)) {
VLOG(0) << "Not starting EventProfiler - profilers for GPU "
<< profiler_->device() << " exceeds profilers per GPU limit ("
<< config->maxEventProfilersPerGpu() << ")";
return;
}
if (!profiler_->setContinuousMode()) {
VLOG(0) << "Continuous mode not supported for GPU "
<< profiler_->device() << ". Not starting Event Profiler.";
return;
}
VLOG(0) << "Starting Event Profiler for GPU " << profiler_->device();
setThreadName("CUPTI Event Profiler");
time_point<system_clock> next_sample_time;
time_point<system_clock> next_report_time;
time_point<system_clock> next_on_demand_report_time;
time_point<system_clock> next_multiplex_time;
std::unique_ptr<Config> on_demand_config = nullptr;
bool reconfigure = true;
bool restart = true;
int report_count = 0;
int on_demand_report_count = 0;
while (!stopRunloop_) {
heartbeatMonitor_.profilerHeartbeat();
if (configLoader_.hasNewConfig(*config)) {
config = configLoader_.getConfigCopy();
VLOG(0) << "Base config changed";
report_count = 0;
reconfigure = true;
}
auto now = system_clock::now();
if (on_demand_config &&
now > (on_demand_config->eventProfilerOnDemandStartTime() +
on_demand_config->eventProfilerOnDemandDuration())) {
on_demand_config = nullptr;
LOG(INFO) << "On-demand profiling complete";
reconfigure = true;
}
if (!profiler_->isOnDemandActive()) {
std::lock_guard<std::mutex> lock(mutex_);
if (newOnDemandConfig_) {
VLOG(0) << "Received on-demand config, reconfiguring";
on_demand_config = std::move(newOnDemandConfig_);
reconfigure = true;
on_demand_report_count = 0;
}
}
if (reconfigure) {
try {
profiler_->configure(*config, on_demand_config.get());
} catch (const std::exception& ex) {
LOG(ERROR) << "Encountered error while configuring event profiler: "
<< ex.what();
// Exit profiling entirely when encountering an error here
// as it indicates a serious problem or bug.
break;
}
configureHeartbeatMonitor(
heartbeatMonitor_, *config, on_demand_config.get());
reconfigure = false;
restart = true;
}
if (restart) {
now = system_clock::now();
next_sample_time = now + profiler_->samplePeriod();
next_report_time = now + profiler_->reportPeriod();
if (profiler_->isOnDemandActive()) {
next_on_demand_report_time = now + profiler_->onDemandReportPeriod();
}
next_multiplex_time = now + profiler_->multiplexPeriod();
// Collect an initial sample and throw it away
// The next sample is the first valid one
profiler_->collectSample();
profiler_->clearSamples();
restart = false;
}
auto start_sleep = now;
while (now < next_sample_time) {
/* sleep override */
std::this_thread::sleep_for(next_sample_time - now);
now = system_clock::now();
}
int sleep_time = duration_cast<milliseconds>(now - start_sleep).count();
auto start_sample = now;
profiler_->collectSample();
now = system_clock::now();
int sample_time = duration_cast<milliseconds>(now - start_sample).count();
next_sample_time += profiler_->samplePeriod();
if (now > next_sample_time) {
reportLateSample(sleep_time, sample_time, 0, 0);
restart = true;
continue;
}
auto start_report = now;
if (now > next_report_time) {
VLOG(1) << "Report #" << report_count++;
profiler_->reportSamples();
next_report_time += profiler_->reportPeriod();
}
if (profiler_->isOnDemandActive() && now > next_on_demand_report_time) {
VLOG(1) << "OnDemand Report #" << on_demand_report_count++;
profiler_->reportOnDemandSamples();
next_on_demand_report_time += profiler_->onDemandReportPeriod();
}
profiler_->eraseReportedSamples();
now = system_clock::now();
int report_time = duration_cast<milliseconds>(now - start_report).count();
if (now > next_sample_time) {
reportLateSample(sleep_time, sample_time, report_time, 0);
restart = true;
continue;
}
auto start_multiplex = now;
if (profiler_->multiplexEnabled() && now > next_multiplex_time) {
profiler_->enableNextCounterSet();
next_multiplex_time += profiler_->multiplexPeriod();
}
now = system_clock::now();
int multiplex_time =
duration_cast<milliseconds>(now - start_multiplex).count();
if (now > next_sample_time) {
reportLateSample(sleep_time, sample_time, report_time, multiplex_time);
restart = true;
}
VLOG(0) << "Runloop execution time: "
<< duration_cast<milliseconds>(now - start_sample).count() << "ms";
}
VLOG(0) << "Device " << profiler_->device()
<< ": Exited event profiling loop";
}