void runBench()

in cachelib/benchmarks/MutexBench.cpp [244:365]


void runBench(Stats& global) {
  // mutex for our lru
  Mutex mutex;
  std::vector<std::thread> threads;

  // thread local stats.
  std::vector<Stats> stats(FLAGS_num_threads);

  // count of number of threads completed
  std::atomic<unsigned int> nCompleted{0};

  // main thread will wait on this to figure out when the benchmark is
  // complete
  std::mutex benchFinishMutex;
  bool benchFinished = false;
  std::condition_variable benchFinishCV;

  // all benchmark threads will wait on this after completing the benchmark
  // and before doing the thread cleanup.
  bool cleanUpStart = false;
  std::condition_variable cleanupCV;
  std::mutex cleanupMutex;

  auto runInThread = [&](unsigned int index) {
    auto rng = folly::ThreadLocalPRNG();
    std::mt19937 gen(folly::Random::rand32(rng));
    std::uniform_real_distribution<> opDis(0, 1);
    auto& s = stats[index];

    const auto now = util::getCurrentTimeNs();

    using LockHolder = std::unique_lock<Mutex>;
    for (size_t i = 0; i < FLAGS_num_thread_ops; i++) {
      const auto r = opDis(gen);
      const uint32_t currTime = util::getCurrentTimeSec();

      if (r < gLoadInfo.updateRatio) {
        const auto idx = gLru->getRandomNodeIdx(rng);
        ++s.numUpdates;
        LockHolder l(mutex);
        gLru->doUpdate(idx, currTime);
      }

      if (r < gLoadInfo.evictRatio) {
        const auto searchLen = folly::Random::rand32(FLAGS_max_search_len, rng);
        ++s.numEvicts;
        LockHolder l(mutex);
        gLru->doEvict(searchLen, currTime);
      }

      if (r < gLoadInfo.deleteRatio) {
        ++s.numEvicts;
        LockHolder l(mutex);
        // for now, deletes also do evicts
        gLru->doEvict(1, currTime);
      }

      if (FLAGS_spin_nano) {
        unsigned int spins = FLAGS_spin_nano / 40;
        while (--spins) {
          // this pause takes up to 40 clock cycles on intel and the lock
          // cmpxchgl
          // above should take about 100 clock cycles. we pause once every 400
          // cycles or so if we are extremely unlucky.
          folly::asm_volatile_pause();
        }
      }

      if (FLAGS_sleep_nano) {
        /* sleep override */ std::this_thread::sleep_for(
            std::chrono::nanoseconds(FLAGS_sleep_nano));
      }
    }

    s.elapsedNSecs += (util::getCurrentTimeNs() - now);

    if (++nCompleted == FLAGS_num_threads) {
      {
        std::unique_lock<std::mutex> l(benchFinishMutex);
        benchFinished = true;
      }
      benchFinishCV.notify_one();
    }

    std::unique_lock<std::mutex> l(cleanupMutex);
    cleanupCV.wait(l, [&] { return cleanUpStart; });
  };

  struct rusage rUsageBefore = {};
  struct rusage rUsageAfter = {};
  BENCHMARK_SUSPEND {
    getrusage(RUSAGE_SELF, &rUsageBefore);
    for (size_t i = 0; i < FLAGS_num_threads; i++) {
      threads.push_back(std::thread{runInThread, i});
    }
  }

  {
    // wait for benchmark to finish.
    std::unique_lock<std::mutex> l(benchFinishMutex);
    benchFinishCV.wait(l, [&] { return benchFinished; });
  }

  BENCHMARK_SUSPEND {
    {
      std::unique_lock<std::mutex> l(cleanupMutex);
      cleanUpStart = true;
    }

    cleanupCV.notify_all();
    for (auto& thread : threads) {
      thread.join();
    }

    getrusage(RUSAGE_SELF, &rUsageAfter);
    for (auto& stat : stats) {
      global += stat;
    }
    global.numVCsw += rUsageAfter.ru_nvcsw - rUsageBefore.ru_nvcsw;
    global.numInvCsw += rUsageAfter.ru_nivcsw - rUsageBefore.ru_nivcsw;
  }
}