in benchmarks/rnnt/ootb/inference/loadgen/loadgen.cc [1183:1278]
void RunPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
const TestSettingsInternal& settings,
SequenceGen* sequence_gen) {
LogDetail([](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG(detail, "generic_message", "Starting performance mode");
#else
detail("Starting performance mode:");
#endif
});
// Use first loadable set as the performance set.
std::vector<loadgen::LoadableSampleSet> loadable_sets(
loadgen::GenerateLoadableSets(qsl, settings));
const LoadableSampleSet& performance_set = loadable_sets.front();
LoadSamplesToRam(qsl, performance_set.set);
// Start PerfClock/system_clock timers for measuring performance interval
// for comparison vs external timer.
auto pc_start_ts = PerfClock::now();
auto sc_start_ts = std::chrono::system_clock::now();
if (settings.print_timestamps) {
std::cout << "Loadgen :: Perf mode start. system_clock Timestamp = "
<< std::chrono::system_clock::to_time_t(sc_start_ts) << "\n"
<< std::flush;
}
PerformanceResult pr(IssueQueries<scenario, TestMode::PerformanceOnly>(
sut, settings, performance_set, sequence_gen));
// Measure PerfClock/system_clock timer durations for comparison vs
// external timer.
auto pc_stop_ts = PerfClock::now();
auto sc_stop_ts = std::chrono::system_clock::now();
auto pc_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
pc_stop_ts - pc_start_ts)
.count();
auto sc_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
sc_stop_ts - sc_start_ts)
.count();
float pc_sc_ratio = static_cast<float>(pc_duration) / sc_duration;
if (settings.print_timestamps) {
std::cout << "Loadgen :: Perf mode stop. systme_clock Timestamp = "
<< std::chrono::system_clock::to_time_t(sc_stop_ts) << "\n"
<< std::flush;
std::cout << "Loadgen :: PerfClock Perf duration = " << pc_duration
<< "ms\n"
<< std::flush;
std::cout << "Loadgen :: system_clock Perf duration = " << sc_duration
<< "ms\n"
<< std::flush;
std::cout << "Loadgen :: PerfClock/system_clock ratio = " << std::fixed
<< std::setprecision(4) << pc_sc_ratio << "\n"
<< std::flush;
}
if (pc_sc_ratio > 1.01 || pc_sc_ratio < 0.99) {
LogDetail([pc_sc_ratio](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "PerfClock and system_clock differ by more than 1%! "
<< " pc_sc_ratio: " << pc_sc_ratio;
MLPERF_LOG_ERROR(detail, "error_runtime", ss.str());
#else
detail.Error("PerfClock and system_clock differ by more than 1\%! ",
"pc_sc_ratio", pc_sc_ratio);
#endif
});
} else if (pc_sc_ratio > 1.001 || pc_sc_ratio < 0.999) {
LogDetail([pc_sc_ratio](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "PerfClock and system_clock differ by more than 0.1%! "
<< " pc_sc_ratio: " << pc_sc_ratio;
MLPERF_LOG_WARNING(detail, "warning_generic_message", ss.str());
#else
detail.Warning("PerfClock and system_clock differ by more than 0.1\%. ",
"pc_sc_ratio", pc_sc_ratio);
#endif
});
}
sut->ReportLatencyResults(pr.sample_latencies);
PerformanceSummary perf_summary{sut->Name(), settings, std::move(pr)};
LogSummary([perf_summary](AsyncSummary& summary) mutable {
perf_summary.LogSummary(summary);
});
// Create a copy to prevent thread hazard between LogSummary and LogDetail.
PerformanceSummary perf_summary_detail{perf_summary};
LogDetail([perf_summary_detail](AsyncDetail& detail) mutable {
perf_summary_detail.LogDetail(detail);
});
qsl->UnloadSamplesFromRam(performance_set.set);
}