in benchmarks/rnnt/ootb/inference/loadgen/loadgen.cc [1290:1436]
void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
const TestSettingsInternal& base_settings,
SequenceGen* sequence_gen) {
LogDetail([](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG(detail, "generic_message", "Starting FindPeakPerformance mode");
#else
detail("Starting FindPeakPerformance mode:");
#endif
});
if (scenario != TestScenario::MultiStream &&
scenario != TestScenario::MultiStreamFree &&
scenario != TestScenario::Server) {
LogDetail([unsupported_scenario = ToString(scenario)](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG_ERROR(detail, "error_invalid_config",
find_peak_performance::kNotSupportedMsg);
#else
detail.Error(find_peak_performance::kNotSupportedMsg);
#endif
});
return;
}
LogDetail(
[base_field = find_peak_performance::ToStringPerformanceField<scenario>(
base_settings)](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG(
detail, "generic_message",
"FindPeakPerformance: Check validity of the base settings field: " +
base_field);
#else
detail(
"FindPeakPerformance: Check validity of the base settings field: " +
base_field);
#endif
});
// 1. Check whether the lower bound came from user satisfy performance
// constraints or not.
std::vector<loadgen::LoadableSampleSet> base_loadable_sets(
loadgen::GenerateLoadableSets(qsl, base_settings));
const LoadableSampleSet& base_performance_set = base_loadable_sets.front();
LoadSamplesToRam(qsl, base_performance_set.set);
PerformanceResult base_pr(IssueQueries<scenario, TestMode::PerformanceOnly>(
sut, base_settings, base_performance_set, sequence_gen));
PerformanceSummary base_perf_summary{sut->Name(), base_settings,
std::move(base_pr)};
// We can also use all_constraints_met to check performance constraints,
// but to reduce searching time, leave it up to whether the settings satisfy
// min duration & min queries or not to users.
std::string msg;
if (!base_perf_summary.PerfConstraintsMet(&msg)) {
LogDetail([msg](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "FindPeakPerformance: Initial lower bound does not satisfy "
<< "performance constraints, msg: " << msg;
MLPERF_LOG_ERROR(detail, "error_runtime", ss.str());
#else
detail.Error(
"FindPeakPerformance: Initial lower bound does not satisfy "
"performance constraints, msg: " +
msg);
#endif
});
sut->ReportLatencyResults(base_perf_summary.pr.sample_latencies);
PerformanceSummary perf_summary{sut->Name(), base_settings,
std::move(base_perf_summary.pr)};
LogSummary([perf_summary](AsyncSummary& summary) mutable {
perf_summary.LogSummary(summary);
});
// Create a copy to prevent thread hazard between LogSummary and LogDetail.
PerformanceSummary perf_summary_detail{perf_summary};
LogDetail([perf_summary_detail](AsyncDetail& detail) mutable {
perf_summary_detail.LogDetail(detail);
});
qsl->UnloadSamplesFromRam(base_performance_set.set);
return;
}
// Clear loaded samples.
qsl->UnloadSamplesFromRam(base_performance_set.set);
// 2. Find an upper bound based on the lower bound.
std::pair<PerformanceSummary, PerformanceSummary> boundaries =
FindBoundaries<scenario>(sut, qsl, sequence_gen, base_perf_summary);
PerformanceSummary l_perf_summary = boundaries.first;
PerformanceSummary u_perf_summary = boundaries.second;
LogDetail(
[l_field = find_peak_performance::ToStringPerformanceField<scenario>(
l_perf_summary.settings),
u_field = find_peak_performance::ToStringPerformanceField<scenario>(
u_perf_summary.settings)](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG(detail, "generic_message",
"FindPeakPerformance: Found boundaries: [" + l_field + ", " +
u_field + ")");
#else
detail("FindPeakPerformance: Found boundaries: [" + l_field + ", " +
u_field + ")");
#endif
});
// Reuse performance_set, u_perf_summary has the largest 'samples_per_query'.
std::vector<loadgen::LoadableSampleSet> loadable_sets(
loadgen::GenerateLoadableSets(qsl, u_perf_summary.settings));
const LoadableSampleSet& performance_set = loadable_sets.front();
LoadSamplesToRam(qsl, performance_set.set);
// 3. Find peak performance settings using the found boundaries
PerformanceSummary perf_summary = FindPeakPerformanceBinarySearch<scenario>(
sut, qsl, sequence_gen, performance_set, l_perf_summary, u_perf_summary);
// Print-out the peak performance test setting.
LogDetail([field = find_peak_performance::ToStringPerformanceField<scenario>(
perf_summary.settings)](AsyncDetail& detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG(detail, "generic_message",
"FindPeakPerformance: Found peak performance field: " + field);
#else
detail("FindPeakPerformance: Found peak performance field: " + field);
#endif
});
sut->ReportLatencyResults(perf_summary.pr.sample_latencies);
LogSummary([perf_summary](AsyncSummary& summary) mutable {
perf_summary.LogSummary(summary);
});
// Create a copy to prevent thread hazard between LogSummary and LogDetail.
PerformanceSummary perf_summary_detail{perf_summary};
LogDetail([perf_summary_detail](AsyncDetail& detail) mutable {
perf_summary_detail.LogDetail(detail);
});
qsl->UnloadSamplesFromRam(performance_set.set);
}