int main()

in benchmarks/rnnt/ootb/inference/loadgen/benchmark/repro.cpp [214:302]


int main(int argc, char** argv) {
  assert(argc >= 2 && "Need to pass in at least one argument: target_qps");
  int target_qps = std::stoi(argv[1]);
  std::cout << "target_qps = " << target_qps << std::endl;

  bool useQueue{false};
  int numCompleteThreads{4};
  int maxSize{1};
  bool server_coalesce_queries{false};
  int num_issue_threads{0};
  if (argc >= 3) {
    useQueue = std::stoi(argv[2]) != 0;
  }
  if (argc >= 4) {
    numCompleteThreads = std::stoi(argv[3]);
  }
  if (argc >= 5) {
    maxSize = std::stoi(argv[4]);
  }
  if (argc >= 6) {
    server_coalesce_queries = std::stoi(argv[5]) != 0;
  }
  if (argc >= 7) {
    num_issue_threads = std::stoi(argv[6]);
  }

  QSL qsl;
  std::unique_ptr<mlperf::SystemUnderTest> sut;

  // Configure the test settings
  mlperf::TestSettings testSettings;
  testSettings.scenario = mlperf::TestScenario::Server;
  testSettings.mode = mlperf::TestMode::PerformanceOnly;
  testSettings.server_target_qps = target_qps;
  testSettings.server_target_latency_ns = 10000000;  // 10ms
  testSettings.server_target_latency_percentile = 0.99;
  testSettings.min_duration_ms = 60000;
  testSettings.min_query_count = 270000;
  testSettings.server_coalesce_queries = server_coalesce_queries;
  std::cout << "testSettings.server_coalesce_queries = "
            << (server_coalesce_queries ? "True" : "False") << std::endl;
  testSettings.server_num_issue_query_threads = num_issue_threads;
  std::cout << "num_issue_threads = " << num_issue_threads << std::endl;

  // Configure the logging settings
  mlperf::LogSettings logSettings;
  logSettings.log_output.outdir = "build";
  logSettings.log_output.prefix = "mlperf_log_";
  logSettings.log_output.suffix = "";
  logSettings.log_output.prefix_with_datetime = false;
  logSettings.log_output.copy_detail_to_stdout = false;
  logSettings.log_output.copy_summary_to_stdout = true;
  logSettings.log_mode = mlperf::LoggingMode::AsyncPoll;
  logSettings.log_mode_async_poll_interval_ms = 1000;
  logSettings.enable_trace = false;

  // Choose SUT
  if (num_issue_threads == 0) {
    if (useQueue) {
      std::cout << "Using QueueSUT with " << numCompleteThreads
                << " complete threads" << std::endl;
      sut.reset(new QueueSUT(numCompleteThreads, maxSize));
    } else {
      std::cout << "Using BasicSUT" << std::endl;
      sut.reset(new BasicSUT());
    }
  } else {
    if (useQueue) {
      std::cout << "Using MultiQueueSUT with " << numCompleteThreads
                << " complete threads" << std::endl;
      std::cerr << "!!!! MultiQueueSUT is NOT implemented yet !!!!"
                << std::endl;
      return 1;
      // sut.reset(new MultiQueueSUT(num_issue_threads, numCompleteThreads,
      // maxSize));
    } else {
      std::cout << "Using MultiBasicSUT" << std::endl;
      sut.reset(new MultiBasicSUT(num_issue_threads));
    }
  }

  // Start test
  std::cout << "Start test..." << std::endl;
  mlperf::StartTest(sut.get(), &qsl, testSettings, logSettings);
  std::cout << "Test done. Clean up SUT..." << std::endl;
  sut.reset();
  std::cout << "Done!" << std::endl;
  return 0;
}