in benchmarks/rnnt/ootb/inference/loadgen/loadgen.cc [847:952]
void PerformanceSummary::LogDetail(AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
ProcessLatencies();
// General validity checking
std::string min_duration_recommendation;
std::string perf_constraints_recommendation;
bool min_duration_met = MinDurationMet(&min_duration_recommendation);
bool min_queries_met = MinQueriesMet() && MinSamplesMet();
bool perf_constraints_met =
PerfConstraintsMet(&perf_constraints_recommendation);
bool all_constraints_met =
min_duration_met && min_queries_met && perf_constraints_met;
MLPERF_LOG(detail, "result_validity",
all_constraints_met ? "VALID" : "INVALID");
if (HasPerfConstraints()) {
MLPERF_LOG(detail, "result_perf_constraints_met", perf_constraints_met);
}
MLPERF_LOG(detail, "result_min_duration_met", min_duration_met);
MLPERF_LOG(detail, "result_min_queries_met", min_queries_met);
if (!all_constraints_met) {
std::string recommendation;
if (!perf_constraints_met) {
recommendation += perf_constraints_recommendation + " ";
}
if (!min_duration_met) {
recommendation += min_duration_recommendation + " ";
}
if (!min_queries_met) {
recommendation +=
"The test exited early, before enough queries were issued.";
}
MLPERF_LOG(detail, "result_invalid_reason", recommendation);
}
auto reportPerQueryLatencies = [&]() {
for (auto& lp : latency_percentiles) {
std::string percentile = DoubleToString(lp.percentile * 100);
MLPERF_LOG(detail,
"result_" + percentile +
"_percentile_num_intervals_between_queries",
lp.query_intervals);
MLPERF_LOG(detail,
"result_" + percentile + "_percentile_per_query_latency_ns",
lp.query_latency);
}
};
// Per-scenario performance results.
switch (settings.scenario) {
case TestScenario::SingleStream: {
double qps_w_lg = (sample_count - 1) / pr.final_query_issued_time;
double qps_wo_lg = 1 / QuerySampleLatencyToSeconds(sample_latency_mean);
MLPERF_LOG(detail, "result_qps_with_loadgen_overhead", qps_w_lg);
MLPERF_LOG(detail, "result_qps_without_loadgen_overhead", qps_wo_lg);
break;
}
case TestScenario::MultiStreamFree: {
double samples_per_second = pr.queries_issued *
settings.samples_per_query /
pr.final_query_all_samples_done_time;
MLPERF_LOG(detail, "result_samples_per_second", samples_per_second);
reportPerQueryLatencies();
break;
}
case TestScenario::MultiStream: {
reportPerQueryLatencies();
break;
}
case TestScenario::Server: {
// Subtract 1 from sample count since the start of the final sample
// represents the open end of the time range: i.e. [begin, end).
// This makes sense since:
// a) QPS doesn't apply if there's only one sample; it's pure latency.
// b) If you have precisely 1k QPS, there will be a sample exactly on
// the 1 second time point; but that would be the 1001th sample in
// the stream. Given the first 1001 queries, the QPS is
// 1000 queries / 1 second.
double qps_as_scheduled =
(sample_count - 1) / pr.final_query_scheduled_time;
MLPERF_LOG(detail, "result_scheduled_samples_per_sec", qps_as_scheduled);
double qps_as_completed =
(sample_count - 1) / pr.final_query_all_samples_done_time;
MLPERF_LOG(detail, "result_completed_samples_per_sec", qps_as_completed);
break;
}
case TestScenario::Offline: {
double samples_per_second = sample_count / pr.max_latency;
MLPERF_LOG(detail, "result_samples_per_second", samples_per_second);
break;
}
}
// Detailed latencies
MLPERF_LOG(detail, "result_min_latency_ns", sample_latency_min);
MLPERF_LOG(detail, "result_max_latency_ns", sample_latency_max);
MLPERF_LOG(detail, "result_mean_latency_ns", sample_latency_mean);
for (auto& lp : latency_percentiles) {
MLPERF_LOG(detail,
"result_" + DoubleToString(lp.percentile * 100) +
"_percentile_latency_ns",
lp.sample_latency);
}
#endif
}