func getGpuMetricsInfo()

in pkg/gpu/nvidia/metrics/metrics.go [182:206]


func getGpuMetricsInfo(device string, d *nvml.Device) (metricsInfo, error) {
	uuid, ret := d.GetUUID()
	if ret != nvml.SUCCESS {
		return metricsInfo{}, fmt.Errorf("failed to get GPU UUID: %v", nvml.ErrorString(ret))
	}
	deviceModel, ret := d.GetName()
	if ret != nvml.SUCCESS {
		return metricsInfo{}, fmt.Errorf("failed to get GPU device model: %v", nvml.ErrorString(ret))
	}

	mem, ret := d.GetMemoryInfo()
	if ret != nvml.SUCCESS {
		return metricsInfo{}, fmt.Errorf("failed to get GPU memory: %v", nvml.ErrorString(ret))
	}
	dutyCycle, err := gmc.collectDutyCycle(uuid, time.Second*10)
	if err != nil {
		return metricsInfo{}, fmt.Errorf("failed to get dutyCycle: %v", err)
	}
	return metricsInfo{
		dutyCycle:   dutyCycle,
		usedMemory:  mem.Used,
		totalMemory: mem.Total,
		uuid:        uuid,
		deviceModel: deviceModel}, nil
}