in benchmarks/rnnt/ootb/inference/loadgen/loadgen.cc [704:845]
void PerformanceSummary::LogSummary(AsyncSummary& summary) {
ProcessLatencies();
summary(
"================================================\n"
"MLPerf Results Summary\n"
"================================================");
summary("SUT name : ", sut_name);
summary("Scenario : ", ToString(settings.scenario));
summary("Mode : ", ToString(settings.mode));
switch (settings.scenario) {
case TestScenario::SingleStream: {
summary(DoubleToString(target_latency_percentile.percentile * 100, 0) +
"th percentile latency (ns) : ",
target_latency_percentile.sample_latency);
break;
}
case TestScenario::MultiStream: {
summary("Samples per query : ", settings.samples_per_query);
break;
}
case TestScenario::MultiStreamFree: {
double samples_per_second = pr.queries_issued *
settings.samples_per_query /
pr.final_query_all_samples_done_time;
summary("Samples per second : ", samples_per_second);
break;
}
case TestScenario::Server: {
// Subtract 1 from sample count since the start of the final sample
// represents the open end of the time range: i.e. [begin, end).
// This makes sense since:
// a) QPS doesn't apply if there's only one sample; it's pure latency.
// b) If you have precisely 1k QPS, there will be a sample exactly on
// the 1 second time point; but that would be the 1001th sample in
// the stream. Given the first 1001 queries, the QPS is
// 1000 queries / 1 second.
double qps_as_scheduled =
(sample_count - 1) / pr.final_query_scheduled_time;
summary("Scheduled samples per second : ",
DoubleToString(qps_as_scheduled));
break;
}
case TestScenario::Offline: {
double samples_per_second = sample_count / pr.max_latency;
summary("Samples per second: ", samples_per_second);
break;
}
}
std::string min_duration_recommendation;
std::string perf_constraints_recommendation;
bool min_duration_met = MinDurationMet(&min_duration_recommendation);
bool min_queries_met = MinQueriesMet() && MinSamplesMet();
bool perf_constraints_met =
PerfConstraintsMet(&perf_constraints_recommendation);
bool all_constraints_met =
min_duration_met && min_queries_met && perf_constraints_met;
summary("Result is : ", all_constraints_met ? "VALID" : "INVALID");
if (HasPerfConstraints()) {
summary(" Performance constraints satisfied : ",
perf_constraints_met ? "Yes" : "NO");
}
summary(" Min duration satisfied : ", min_duration_met ? "Yes" : "NO");
summary(" Min queries satisfied : ", min_queries_met ? "Yes" : "NO");
if (!all_constraints_met) {
summary("Recommendations:");
if (!perf_constraints_met) {
summary(" * " + perf_constraints_recommendation);
}
if (!min_duration_met) {
summary(" * " + min_duration_recommendation);
}
if (!min_queries_met) {
summary(
" * The test exited early, before enough queries were issued.\n"
" See the detailed log for why this may have occurred.");
}
}
summary(
"\n"
"================================================\n"
"Additional Stats\n"
"================================================");
if (settings.scenario == TestScenario::SingleStream) {
double qps_w_lg = (sample_count - 1) / pr.final_query_issued_time;
double qps_wo_lg = 1 / QuerySampleLatencyToSeconds(sample_latency_mean);
summary("QPS w/ loadgen overhead : " + DoubleToString(qps_w_lg));
summary("QPS w/o loadgen overhead : " + DoubleToString(qps_wo_lg));
summary("");
} else if (settings.scenario == TestScenario::Server) {
double qps_as_completed =
(sample_count - 1) / pr.final_query_all_samples_done_time;
summary("Completed samples per second : ",
DoubleToString(qps_as_completed));
summary("");
} else if (settings.scenario == TestScenario::MultiStream ||
settings.scenario == TestScenario::MultiStreamFree) {
double ms_per_interval = std::milli::den / settings.target_qps;
summary("Intervals between each IssueQuery: ", "qps", settings.target_qps,
"ms", ms_per_interval);
for (auto& lp : latency_percentiles) {
summary(DoubleToString(lp.percentile * 100) + " percentile : ",
lp.query_intervals);
}
summary("");
double target_ns = settings.target_latency.count();
double target_ms = target_ns * std::milli::den / std::nano::den;
summary("Per-query latency: ", "target_ns",
settings.target_latency.count(), "target_ms", target_ms);
for (auto& lp : latency_percentiles) {
summary(
DoubleToString(lp.percentile * 100) + " percentile latency (ns) : ",
lp.query_latency);
}
summary("");
summary("Per-sample latency:");
}
summary("Min latency (ns) : ", sample_latency_min);
summary("Max latency (ns) : ", sample_latency_max);
summary("Mean latency (ns) : ", sample_latency_mean);
for (auto& lp : latency_percentiles) {
summary(
DoubleToString(lp.percentile * 100) + " percentile latency (ns) : ",
lp.sample_latency);
}
summary(
"\n"
"================================================\n"
"Test Parameters Used\n"
"================================================");
settings.LogSummary(summary);
}