void RunPerformanceMode()

in benchmarks/rnnt/ootb/inference/loadgen/loadgen.cc [1183:1278]


void RunPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
                        const TestSettingsInternal& settings,
                        SequenceGen* sequence_gen) {
  LogDetail([](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
    MLPERF_LOG(detail, "generic_message", "Starting performance mode");
#else
    detail("Starting performance mode:");
#endif
  });

  // Use first loadable set as the performance set.
  std::vector<loadgen::LoadableSampleSet> loadable_sets(
      loadgen::GenerateLoadableSets(qsl, settings));
  const LoadableSampleSet& performance_set = loadable_sets.front();
  LoadSamplesToRam(qsl, performance_set.set);

  // Start PerfClock/system_clock timers for measuring performance interval
  // for comparison vs external timer.
  auto pc_start_ts = PerfClock::now();
  auto sc_start_ts = std::chrono::system_clock::now();
  if (settings.print_timestamps) {
    std::cout << "Loadgen :: Perf mode start. system_clock Timestamp = "
              << std::chrono::system_clock::to_time_t(sc_start_ts) << "\n"
              << std::flush;
  }

  PerformanceResult pr(IssueQueries<scenario, TestMode::PerformanceOnly>(
      sut, settings, performance_set, sequence_gen));

  // Measure PerfClock/system_clock timer durations for comparison vs
  // external timer.
  auto pc_stop_ts = PerfClock::now();
  auto sc_stop_ts = std::chrono::system_clock::now();
  auto pc_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
                         pc_stop_ts - pc_start_ts)
                         .count();
  auto sc_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
                         sc_stop_ts - sc_start_ts)
                         .count();
  float pc_sc_ratio = static_cast<float>(pc_duration) / sc_duration;
  if (settings.print_timestamps) {
    std::cout << "Loadgen :: Perf mode stop. systme_clock Timestamp = "
              << std::chrono::system_clock::to_time_t(sc_stop_ts) << "\n"
              << std::flush;
    std::cout << "Loadgen :: PerfClock Perf duration = " << pc_duration
              << "ms\n"
              << std::flush;
    std::cout << "Loadgen :: system_clock Perf duration = " << sc_duration
              << "ms\n"
              << std::flush;
    std::cout << "Loadgen :: PerfClock/system_clock ratio = " << std::fixed
              << std::setprecision(4) << pc_sc_ratio << "\n"
              << std::flush;
  }

  if (pc_sc_ratio > 1.01 || pc_sc_ratio < 0.99) {
    LogDetail([pc_sc_ratio](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
      std::stringstream ss;
      ss << "PerfClock and system_clock differ by more than 1%! "
         << " pc_sc_ratio: " << pc_sc_ratio;
      MLPERF_LOG_ERROR(detail, "error_runtime", ss.str());
#else
      detail.Error("PerfClock and system_clock differ by more than 1\%! ",
                   "pc_sc_ratio", pc_sc_ratio);
#endif
    });
  } else if (pc_sc_ratio > 1.001 || pc_sc_ratio < 0.999) {
    LogDetail([pc_sc_ratio](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
      std::stringstream ss;
      ss << "PerfClock and system_clock differ by more than 0.1%! "
         << " pc_sc_ratio: " << pc_sc_ratio;
      MLPERF_LOG_WARNING(detail, "warning_generic_message", ss.str());
#else
      detail.Warning("PerfClock and system_clock differ by more than 0.1\%. ",
                     "pc_sc_ratio", pc_sc_ratio);
#endif
    });
  }

  sut->ReportLatencyResults(pr.sample_latencies);

  PerformanceSummary perf_summary{sut->Name(), settings, std::move(pr)};
  LogSummary([perf_summary](AsyncSummary& summary) mutable {
    perf_summary.LogSummary(summary);
  });
  // Create a copy to prevent thread hazard between LogSummary and LogDetail.
  PerformanceSummary perf_summary_detail{perf_summary};
  LogDetail([perf_summary_detail](AsyncDetail& detail) mutable {
    perf_summary_detail.LogDetail(detail);
  });

  qsl->UnloadSamplesFromRam(performance_set.set);
}