std::shared_ptr ContextImpl::create()

in tensorpipe/channel/cuda_ipc/context_impl.cc [197:276]


std::shared_ptr<ContextImpl> ContextImpl::create() {
  Error error;
  CudaLib cudaLib;
  std::tie(error, cudaLib) = CudaLib::create();
  if (error) {
    TP_VLOG(5)
        << "CUDA IPC channel is not viable because libcuda could not be loaded: "
        << error.what();
    return nullptr;
  }

  NvmlLib nvmlLib;
  std::tie(error, nvmlLib) = NvmlLib::create();
  if (error) {
    TP_VLOG(5)
        << "CUDA IPC channel is not viable because libnvidia-ml could not be loaded: "
        << error.what();
    return nullptr;
  }

  const std::string bootId = generateBootId();
  const pid_t pid = ::getpid();

  std::unordered_map<Device, std::string> deviceDescriptors;
  for (const auto& device : getCudaDevices(cudaLib)) {
    // This part is largely inspired from
    // https://github.com/NVIDIA/cuda-samples/blob/master/Samples/simpleIPC/simpleIPC.cu.
    cudaDeviceProp props;
    TP_CUDA_CHECK(cudaGetDeviceProperties(&props, device.index));

    // Unified addressing is required for IPC.
    if (!props.unifiedAddressing) {
      TP_VLOG(4) << "CUDA IPC channel is not viable because CUDA device "
                 << device.index << " does not have unified addressing";
      return nullptr;
    }

    // The other two compute modes are "exclusive" and "prohibited", both of
    // which prevent access from an other process.
    if (props.computeMode != cudaComputeModeDefault) {
      TP_VLOG(4) << "CUDA IPC channel is not viable because CUDA device "
                 << device.index << " is not in default compute mode";
      return nullptr;
    }

    NopHolder<DeviceDescriptor> nopHolder;
    DeviceDescriptor& deviceDescriptor = nopHolder.getObject();
    deviceDescriptor.bootId = bootId;
    deviceDescriptor.pid = static_cast<int64_t>(pid);
    deviceDescriptor.deviceUuid = getUuidOfDevice(cudaLib, device.index);

    deviceDescriptors[device] = saveDescriptor(nopHolder);
  }

  std::vector<std::string> globalUuids;
  std::vector<std::vector<bool>> p2pSupport;
  std::tie(globalUuids, p2pSupport) = getGlobalUuidsAndP2pSupport(nvmlLib);
  TP_VLOG(4) << "The UUIDs of all the GPUs found by the CUDA IPC channel are "
             << joinStrs(globalUuids);
  TP_VLOG(4) << "The peer-to-peer support found by the CUDA IPC channel is "
             << formatMatrix(p2pSupport);

  std::ostringstream oss;
  optional<std::string> nsId = getLinuxNamespaceId(LinuxNamespace::kPid);
  if (!nsId.has_value()) {
    TP_VLOG(4)
        << "CUDA IPC channel is not viable because it couldn't determine the PID namespace ID";
    return nullptr;
  }
  oss << nsId.value() << "_" << pid;
  std::string processIdentifier = oss.str();

  return std::make_shared<ContextImpl>(
      std::move(deviceDescriptors),
      std::move(cudaLib),
      std::move(nvmlLib),
      std::move(globalUuids),
      std::move(p2pSupport),
      std::move(processIdentifier));
}