void runOneEpoch()

in benchmark/benchmark.cc [144:161]


  void runOneEpoch() {
    c10::cuda::CUDAStream stream =
        c10::cuda::getStreamFromPool(/*isHighPriority=*/true, /*device=*/0);
    c10::cuda::CUDAStreamGuard g(stream);

    std::vector<c10::intrusive_ptr<c10::ivalue::Future>> futures;
    for (size_t bucketIdx = 0; bucketIdx < numBuckets_; bucketIdx += 1) {
      std::vector<at::Tensor> data = {buckets_[bucketIdx]};
      futures.push_back(pg_->allreduce(data)->getFuture());
    }

    for (const auto& future : futures) {
      // future->wait(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::minutes(1)));
      future->wait();
    }

    stream.synchronize();
  }