in pkg/gpu/nvidia/metrics/metrics.go [182:206]
func getGpuMetricsInfo(device string, d *nvml.Device) (metricsInfo, error) {
uuid, ret := d.GetUUID()
if ret != nvml.SUCCESS {
return metricsInfo{}, fmt.Errorf("failed to get GPU UUID: %v", nvml.ErrorString(ret))
}
deviceModel, ret := d.GetName()
if ret != nvml.SUCCESS {
return metricsInfo{}, fmt.Errorf("failed to get GPU device model: %v", nvml.ErrorString(ret))
}
mem, ret := d.GetMemoryInfo()
if ret != nvml.SUCCESS {
return metricsInfo{}, fmt.Errorf("failed to get GPU memory: %v", nvml.ErrorString(ret))
}
dutyCycle, err := gmc.collectDutyCycle(uuid, time.Second*10)
if err != nil {
return metricsInfo{}, fmt.Errorf("failed to get dutyCycle: %v", err)
}
return metricsInfo{
dutyCycle: dutyCycle,
usedMemory: mem.Used,
totalMemory: mem.Total,
uuid: uuid,
deviceModel: deviceModel}, nil
}