inline std::vector createManyNcclComms()

in fairring/utils.h [143:176]


inline std::vector<NcclComm> createManyNcclComms(
    int rankStart,
    const std::vector<c10::Device>& devices,
    int worldSize,
    ncclUniqueId uniqueId) {
  std::vector<ncclComm_t> rawComms(devices.size());
  NCCL_CHECK(ncclGroupStart());
  for (const auto deviceOffset : c10::irange(devices.size())) {
    c10::cuda::CUDAGuard g(devices[deviceOffset]);
    // std::ostringstream oss;
    // oss << "Initing NCCL on rank " << rankStart + deviceOffset << "/" <<
    // worldSize << " with unique ID "; for (int64_t offset = 0; offset <
    // sizeof(ncclUniqueId); offset += 1) {
    //   oss << std::hex << std::setw(2) << std::setfill('0') <<
    //   static_cast<uint64_t>(*(reinterpret_cast<uint8_t*>(&uniqueId) +
    //   offset));
    // }
    // oss << std::endl;
    // std::cerr << oss.str();
    NCCL_CHECK(ncclCommInitRank(
        &rawComms[deviceOffset],
        worldSize,
        uniqueId,
        rankStart + deviceOffset));
  }
  NCCL_CHECK(ncclGroupEnd());

  std::vector<NcclComm> comms;
  comms.reserve(devices.size());
  for (const auto deviceOffset : c10::irange(devices.size())) {
    comms.push_back(NcclComm(rawComms[deviceOffset], NcclCommDeleter{}));
  }
  return comms;
}