uint64_t calculateInnerAsync()

in benchmarks/nested_for_benchmark.cpp [209:229]


uint64_t calculateInnerAsync(uint64_t input, size_t foo, int numElements) {
  size_t chunkSize = (numElements + g_numThreads - 1) / g_numThreads;

  std::vector<std::future<uint64_t>> futures;

  for (int i = 0; i < kWorkMultiplier * numElements; i += chunkSize) {
    futures.push_back(
        std::async([input, foo, i, end = std::min<int>(numElements, i + chunkSize)]() mutable {
          uint64_t lsum = 0;
          for (; i != end; ++i) {
            lsum += calculate(input, i, foo);
          }
          return lsum;
        }));
  }
  uint64_t sum = 0;
  for (auto& s : futures) {
    sum += s.get();
  }
  return sum;
}