in tensorpipe/channel/cuda_ipc/context_impl.cc [197:276]
std::shared_ptr<ContextImpl> ContextImpl::create() {
Error error;
CudaLib cudaLib;
std::tie(error, cudaLib) = CudaLib::create();
if (error) {
TP_VLOG(5)
<< "CUDA IPC channel is not viable because libcuda could not be loaded: "
<< error.what();
return nullptr;
}
NvmlLib nvmlLib;
std::tie(error, nvmlLib) = NvmlLib::create();
if (error) {
TP_VLOG(5)
<< "CUDA IPC channel is not viable because libnvidia-ml could not be loaded: "
<< error.what();
return nullptr;
}
const std::string bootId = generateBootId();
const pid_t pid = ::getpid();
std::unordered_map<Device, std::string> deviceDescriptors;
for (const auto& device : getCudaDevices(cudaLib)) {
// This part is largely inspired from
// https://github.com/NVIDIA/cuda-samples/blob/master/Samples/simpleIPC/simpleIPC.cu.
cudaDeviceProp props;
TP_CUDA_CHECK(cudaGetDeviceProperties(&props, device.index));
// Unified addressing is required for IPC.
if (!props.unifiedAddressing) {
TP_VLOG(4) << "CUDA IPC channel is not viable because CUDA device "
<< device.index << " does not have unified addressing";
return nullptr;
}
// The other two compute modes are "exclusive" and "prohibited", both of
// which prevent access from an other process.
if (props.computeMode != cudaComputeModeDefault) {
TP_VLOG(4) << "CUDA IPC channel is not viable because CUDA device "
<< device.index << " is not in default compute mode";
return nullptr;
}
NopHolder<DeviceDescriptor> nopHolder;
DeviceDescriptor& deviceDescriptor = nopHolder.getObject();
deviceDescriptor.bootId = bootId;
deviceDescriptor.pid = static_cast<int64_t>(pid);
deviceDescriptor.deviceUuid = getUuidOfDevice(cudaLib, device.index);
deviceDescriptors[device] = saveDescriptor(nopHolder);
}
std::vector<std::string> globalUuids;
std::vector<std::vector<bool>> p2pSupport;
std::tie(globalUuids, p2pSupport) = getGlobalUuidsAndP2pSupport(nvmlLib);
TP_VLOG(4) << "The UUIDs of all the GPUs found by the CUDA IPC channel are "
<< joinStrs(globalUuids);
TP_VLOG(4) << "The peer-to-peer support found by the CUDA IPC channel is "
<< formatMatrix(p2pSupport);
std::ostringstream oss;
optional<std::string> nsId = getLinuxNamespaceId(LinuxNamespace::kPid);
if (!nsId.has_value()) {
TP_VLOG(4)
<< "CUDA IPC channel is not viable because it couldn't determine the PID namespace ID";
return nullptr;
}
oss << nsId.value() << "_" << pid;
std::string processIdentifier = oss.str();
return std::make_shared<ContextImpl>(
std::move(deviceDescriptors),
std::move(cudaLib),
std::move(nvmlLib),
std::move(globalUuids),
std::move(p2pSupport),
std::move(processIdentifier));
}