void FindPeakPerformanceMode()

in benchmarks/rnnt/ootb/inference/loadgen/loadgen.cc [1290:1436]


void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
                             const TestSettingsInternal& base_settings,
                             SequenceGen* sequence_gen) {
  LogDetail([](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
    MLPERF_LOG(detail, "generic_message", "Starting FindPeakPerformance mode");
#else
    detail("Starting FindPeakPerformance mode:");
#endif
  });

  if (scenario != TestScenario::MultiStream &&
      scenario != TestScenario::MultiStreamFree &&
      scenario != TestScenario::Server) {
    LogDetail([unsupported_scenario = ToString(scenario)](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
      MLPERF_LOG_ERROR(detail, "error_invalid_config",
                       find_peak_performance::kNotSupportedMsg);
#else
      detail.Error(find_peak_performance::kNotSupportedMsg);
#endif
    });
    return;
  }

  LogDetail(
      [base_field = find_peak_performance::ToStringPerformanceField<scenario>(
           base_settings)](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
        MLPERF_LOG(
            detail, "generic_message",
            "FindPeakPerformance: Check validity of the base settings field: " +
                base_field);
#else
        detail(
            "FindPeakPerformance: Check validity of the base settings field: " +
            base_field);
#endif
      });

  // 1. Check whether the lower bound came from user satisfy performance
  // constraints or not.
  std::vector<loadgen::LoadableSampleSet> base_loadable_sets(
      loadgen::GenerateLoadableSets(qsl, base_settings));
  const LoadableSampleSet& base_performance_set = base_loadable_sets.front();
  LoadSamplesToRam(qsl, base_performance_set.set);

  PerformanceResult base_pr(IssueQueries<scenario, TestMode::PerformanceOnly>(
      sut, base_settings, base_performance_set, sequence_gen));
  PerformanceSummary base_perf_summary{sut->Name(), base_settings,
                                       std::move(base_pr)};

  // We can also use all_constraints_met to check performance constraints,
  // but to reduce searching time, leave it up to whether the settings satisfy
  // min duration & min queries or not to users.
  std::string msg;
  if (!base_perf_summary.PerfConstraintsMet(&msg)) {
    LogDetail([msg](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
      std::stringstream ss;
      ss << "FindPeakPerformance: Initial lower bound does not satisfy "
         << "performance constraints, msg: " << msg;
      MLPERF_LOG_ERROR(detail, "error_runtime", ss.str());
#else
      detail.Error(
          "FindPeakPerformance: Initial lower bound does not satisfy "
          "performance constraints, msg: " +
          msg);
#endif
    });

    sut->ReportLatencyResults(base_perf_summary.pr.sample_latencies);

    PerformanceSummary perf_summary{sut->Name(), base_settings,
                                    std::move(base_perf_summary.pr)};
    LogSummary([perf_summary](AsyncSummary& summary) mutable {
      perf_summary.LogSummary(summary);
    });
    // Create a copy to prevent thread hazard between LogSummary and LogDetail.
    PerformanceSummary perf_summary_detail{perf_summary};
    LogDetail([perf_summary_detail](AsyncDetail& detail) mutable {
      perf_summary_detail.LogDetail(detail);
    });

    qsl->UnloadSamplesFromRam(base_performance_set.set);

    return;
  }

  // Clear loaded samples.
  qsl->UnloadSamplesFromRam(base_performance_set.set);

  // 2. Find an upper bound based on the lower bound.
  std::pair<PerformanceSummary, PerformanceSummary> boundaries =
      FindBoundaries<scenario>(sut, qsl, sequence_gen, base_perf_summary);
  PerformanceSummary l_perf_summary = boundaries.first;
  PerformanceSummary u_perf_summary = boundaries.second;

  LogDetail(
      [l_field = find_peak_performance::ToStringPerformanceField<scenario>(
           l_perf_summary.settings),
       u_field = find_peak_performance::ToStringPerformanceField<scenario>(
           u_perf_summary.settings)](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
        MLPERF_LOG(detail, "generic_message",
                   "FindPeakPerformance: Found boundaries: [" + l_field + ", " +
                       u_field + ")");
#else
        detail("FindPeakPerformance: Found boundaries: [" + l_field + ", " +
               u_field + ")");
#endif
      });

  // Reuse performance_set, u_perf_summary has the largest 'samples_per_query'.
  std::vector<loadgen::LoadableSampleSet> loadable_sets(
      loadgen::GenerateLoadableSets(qsl, u_perf_summary.settings));
  const LoadableSampleSet& performance_set = loadable_sets.front();
  LoadSamplesToRam(qsl, performance_set.set);

  // 3. Find peak performance settings using the found boundaries
  PerformanceSummary perf_summary = FindPeakPerformanceBinarySearch<scenario>(
      sut, qsl, sequence_gen, performance_set, l_perf_summary, u_perf_summary);

  // Print-out the peak performance test setting.
  LogDetail([field = find_peak_performance::ToStringPerformanceField<scenario>(
                 perf_summary.settings)](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
    MLPERF_LOG(detail, "generic_message",
               "FindPeakPerformance: Found peak performance field: " + field);
#else
    detail("FindPeakPerformance: Found peak performance field: " + field);
#endif
  });

  sut->ReportLatencyResults(perf_summary.pr.sample_latencies);

  LogSummary([perf_summary](AsyncSummary& summary) mutable {
    perf_summary.LogSummary(summary);
  });
  // Create a copy to prevent thread hazard between LogSummary and LogDetail.
  PerformanceSummary perf_summary_detail{perf_summary};
  LogDetail([perf_summary_detail](AsyncDetail& detail) mutable {
    perf_summary_detail.LogDetail(detail);
  });

  qsl->UnloadSamplesFromRam(performance_set.set);
}