void PerformanceSummary::LogDetail()

in benchmarks/rnnt/ootb/inference/loadgen/loadgen.cc [847:952]


void PerformanceSummary::LogDetail(AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
  ProcessLatencies();

  // General validity checking
  std::string min_duration_recommendation;
  std::string perf_constraints_recommendation;
  bool min_duration_met = MinDurationMet(&min_duration_recommendation);
  bool min_queries_met = MinQueriesMet() && MinSamplesMet();
  bool perf_constraints_met =
      PerfConstraintsMet(&perf_constraints_recommendation);
  bool all_constraints_met =
      min_duration_met && min_queries_met && perf_constraints_met;

  MLPERF_LOG(detail, "result_validity",
             all_constraints_met ? "VALID" : "INVALID");
  if (HasPerfConstraints()) {
    MLPERF_LOG(detail, "result_perf_constraints_met", perf_constraints_met);
  }
  MLPERF_LOG(detail, "result_min_duration_met", min_duration_met);
  MLPERF_LOG(detail, "result_min_queries_met", min_queries_met);
  if (!all_constraints_met) {
    std::string recommendation;
    if (!perf_constraints_met) {
      recommendation += perf_constraints_recommendation + " ";
    }
    if (!min_duration_met) {
      recommendation += min_duration_recommendation + " ";
    }
    if (!min_queries_met) {
      recommendation +=
          "The test exited early, before enough queries were issued.";
    }
    MLPERF_LOG(detail, "result_invalid_reason", recommendation);
  }

  auto reportPerQueryLatencies = [&]() {
    for (auto& lp : latency_percentiles) {
      std::string percentile = DoubleToString(lp.percentile * 100);
      MLPERF_LOG(detail,
                  "result_" + percentile +
                      "_percentile_num_intervals_between_queries",
                  lp.query_intervals);
      MLPERF_LOG(detail,
                  "result_" + percentile + "_percentile_per_query_latency_ns",
                  lp.query_latency);
    }
  };

  // Per-scenario performance results.
  switch (settings.scenario) {
    case TestScenario::SingleStream: {
      double qps_w_lg = (sample_count - 1) / pr.final_query_issued_time;
      double qps_wo_lg = 1 / QuerySampleLatencyToSeconds(sample_latency_mean);
      MLPERF_LOG(detail, "result_qps_with_loadgen_overhead", qps_w_lg);
      MLPERF_LOG(detail, "result_qps_without_loadgen_overhead", qps_wo_lg);
      break;
    }
    case TestScenario::MultiStreamFree: {
      double samples_per_second = pr.queries_issued *
                                  settings.samples_per_query /
                                  pr.final_query_all_samples_done_time;
      MLPERF_LOG(detail, "result_samples_per_second", samples_per_second);
      reportPerQueryLatencies();
      break;
    }
    case TestScenario::MultiStream: {
      reportPerQueryLatencies();
      break;
    }
    case TestScenario::Server: {
      // Subtract 1 from sample count since the start of the final sample
      // represents the open end of the time range: i.e. [begin, end).
      // This makes sense since:
      // a) QPS doesn't apply if there's only one sample; it's pure latency.
      // b) If you have precisely 1k QPS, there will be a sample exactly on
      //    the 1 second time point; but that would be the 1001th sample in
      //    the stream. Given the first 1001 queries, the QPS is
      //    1000 queries / 1 second.
      double qps_as_scheduled =
          (sample_count - 1) / pr.final_query_scheduled_time;
      MLPERF_LOG(detail, "result_scheduled_samples_per_sec", qps_as_scheduled);
      double qps_as_completed =
          (sample_count - 1) / pr.final_query_all_samples_done_time;
      MLPERF_LOG(detail, "result_completed_samples_per_sec", qps_as_completed);
      break;
    }
    case TestScenario::Offline: {
      double samples_per_second = sample_count / pr.max_latency;
      MLPERF_LOG(detail, "result_samples_per_second", samples_per_second);
      break;
    }
  }

  // Detailed latencies
  MLPERF_LOG(detail, "result_min_latency_ns", sample_latency_min);
  MLPERF_LOG(detail, "result_max_latency_ns", sample_latency_max);
  MLPERF_LOG(detail, "result_mean_latency_ns", sample_latency_mean);
  for (auto& lp : latency_percentiles) {
    MLPERF_LOG(detail,
               "result_" + DoubleToString(lp.percentile * 100) +
                   "_percentile_latency_ns",
               lp.sample_latency);
  }
#endif
}