void CudaAllreduceHalvingDoubling::initReductionsAndBroadcasts()

in gloo/cuda_allreduce_halving_doubling.cc [605:643]


void CudaAllreduceHalvingDoubling<T, W>::initReductionsAndBroadcasts(
    typename std::enable_if<
        std::is_same<U, CudaDeviceWorkspace<T>>::value,
        typename U::Pointer>::type*) {
  if (stepsWithinBlock_ == 0) {
    return;
  }
  if (!devicePtrsForFirstSend_.empty()) {
    reduceBeforeFirstSend_ = cudaDeviceReduce(
        streams_,
        devicePtrsForFirstSend_,
        scratchPtrForFirstSend_,
        fn_,
        0,
        sendCounts_[0]);
  }
  if (!devicePtrsForFirstRecv_.empty()) {
    reduceBeforeFirstRecv_ = cudaDeviceReduce(
        streams_,
        devicePtrsForFirstRecv_,
        scratchPtrForFirstRecv_,
        fn_,
        0,
        recvCounts_[0]);
  }
  for (int i = 0; i < stepsWithinBlock_; i++) {
    if (devicePtrsForBroadcast_[i].empty()) {
      broadcastOps_.push_back(nullptr);
      continue;
    }
    broadcastOps_.push_back(cudaDeviceBroadcast(
        streams_,
        devicePtrsForBroadcast_[i],
        scratchPtrForBroadcast_[i],
        0,
        i == stepsWithinBlock_ - 1 ? sendCounts_[i] + recvCounts_[i]
                                   : sendCounts_[i]));
  }
}