in source/neuropod/internal/cuda_device_mapping.cc [141:214]
std::unordered_map<int, std::string> get_id_mapping()
{
// Make sure our logging is initialized
init_logging();
if (!load_cuda() || !load_nvml())
{
// Couldn't load CUDA or NVML so we can't do anything else
return {};
}
// Get device count
// Based on https://github.com/pytorch/pytorch/blob/master/c10/cuda/CUDAFunctions.h#L19
int device_count;
int err = cudaGetDeviceCount(&device_count);
// Check if CUDA gave us an error
if (err != 0 /* cudaSuccess */)
{
// Clear out the error state, so we don't spuriously trigger someone else.
cudaGetLastError();
SPDLOG_DEBUG("Error when getting number of GPU devices");
return {};
}
// Check if we have a GPU
if (device_count <= 0)
{
SPDLOG_DEBUG("No GPUs available");
return {};
}
std::unordered_map<int, std::string> id_mapping;
for (int i = 0; i < device_count; i++)
{
// Get the UUID from the device ID
// At most 13 chars according to
// https://docs.nvidia.com/cuda/archive/9.0/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1gea264dad3d8c4898e0b82213c0253def
char pciBusId[13];
err = cudaDeviceGetPCIBusId(pciBusId, sizeof(pciBusId), i);
if (err != 0 /* cudaSuccess */)
{
// Clear out the error state, so we don't spuriously trigger someone else.
cudaGetLastError();
SPDLOG_ERROR("Error when getting pciBusId for GPU {}", i);
return {};
}
// Get an NVML device handle
nvmlDevice_t device;
err = nvmlDeviceGetHandleByPciBusId(pciBusId, &device);
if (err != 0 /* NVML_SUCCESS */)
{
SPDLOG_ERROR("NVML error when getting device from pciBusId: {}", nvmlErrorString(err));
}
// Get a UUID from the handle
// At most 80 chars according to
// https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g84dca2d06974131ccec1651428596191
char uuid[80];
err = nvmlDeviceGetUUID(device, uuid, sizeof(uuid));
if (err != 0 /* NVML_SUCCESS */)
{
SPDLOG_ERROR("NVML error when getting uuid from device: {}", nvmlErrorString(err));
}
SPDLOG_INFO("Found GPU {} with UUID {}", i, uuid);
id_mapping.emplace(i, uuid);
}
return id_mapping;
}