in pkg/gpu/nvidia/manager.go [235:267]
func (ngm *nvidiaGPUManager) discoverGPUs() error {
if nvmlutil.NvmlDeviceInfo == nil {
nvmlutil.NvmlDeviceInfo = &nvmlutil.DeviceInfo{}
}
devicesCount, ret := nvmlutil.NvmlDeviceInfo.DeviceCount()
if ret != nvml.SUCCESS {
return fmt.Errorf("failed to get devices count: %v", nvml.ErrorString(ret))
}
for i := 0; i < devicesCount; i++ {
device, ret := nvmlutil.NvmlDeviceInfo.DeviceHandleByIndex((i))
if ret != nvml.SUCCESS {
return fmt.Errorf("failed to get the device handle for index %d: %v", i, nvml.ErrorString(ret))
}
minor, ret := nvmlutil.NvmlDeviceInfo.MinorNumber(device)
if ret != nvml.SUCCESS {
return fmt.Errorf("failed to get the minor number for device with index %d: %v", i, nvml.ErrorString(ret))
}
path := fmt.Sprintf("nvidia%d", minor)
glog.V(3).Infof("Found Nvidia GPU %q\n", path)
topologyInfo, err := nvmlutil.Topology(device, pciDevicesRoot)
if err != nil {
glog.Errorf("unable to get topology for device with index %d", i, err)
}
ngm.SetDeviceHealth(path, pluginapi.Healthy, topologyInfo)
}
return nil
}