in pkg/gpu/nvidia/manager.go [413:430]
func totalMemPerGPU() (uint64, error) {
count, ret := nvml.DeviceGetCount()
if ret != nvml.SUCCESS {
return 0, fmt.Errorf("failed to enumerate devices: %v", nvml.ErrorString(ret))
}
if count <= 0 {
return 0, fmt.Errorf("no GPUs on node, count: %d", count)
}
device, ret := nvml.DeviceGetHandleByIndex(0)
if ret != nvml.SUCCESS {
return 0, fmt.Errorf("failed to query GPU with nvml: %v", nvml.ErrorString(ret))
}
memory, ret := device.GetMemoryInfo()
if ret != nvml.SUCCESS {
return 0, fmt.Errorf("failed to get GPU memory: %v", nvml.ErrorString(ret))
}
return memory.Total, nil
}