func discoverDevices()

in components/otelopscol/receiver/nvmlreceiver/client.go [160:209]


func discoverDevices(logger *zap.Logger) ([]nvml.Device, []string, []string, error) {
	count, ret := nvml.DeviceGetCount()
	if ret != nvml.SUCCESS {
		return nil, nil, nil, fmt.Errorf("Unable to get Nvidia device count on '%v'", nvml.ErrorString(ret))
	}

	devices := make([]nvml.Device, 0, count)
	names := make([]string, 0, count)
	UUIDs := make([]string, 0, count)
	for i := 0; i < count; i++ {
		device, ret := nvml.DeviceGetHandleByIndex(i)
		if ret != nvml.SUCCESS {
			logger.Sugar().Warnf("Unable to get Nvidia device at index %d on '%v'; ignoring device.", i, nvml.ErrorString(ret))
			continue
		}

		/* Note: UUID and Name query should not fail under normal circumstances */
		UUID, ret := device.GetUUID()
		if ret != nvml.SUCCESS {
			logger.Sugar().Warnf("Unable to get UUID of Nvidia device %d on '%v'; ignoring device.", i, nvml.ErrorString(ret))
			continue
		}

		name, ret := device.GetName()
		if ret != nvml.SUCCESS {
			logger.Sugar().Warnf("Unable to get name of Nvidia device %d on '%v'; ignoring device.", i, nvml.ErrorString(ret))
			continue
		}

		devices = append(devices, device)
		UUIDs = append(UUIDs, UUID)
		names = append(names, name)
		logger.Sugar().Infof("Discovered Nvidia device %d of model %s with UUID %s.", i, name, UUID)

		currMode, _, ret := device.GetMigMode()
		if ret != nvml.SUCCESS {
			logger.Sugar().Warnf("Unable to query MIG mode for Nvidia device %d.", i)
			continue
		}
		if currMode == nvml.DEVICE_MIG_ENABLE {
			logger.Sugar().Warnf("Nvidia device %d has MIG enabled. GPU utilization queries may not be supported.", i)
		}
	}

	if len(devices) == 0 {
		return nil, nil, nil, fmt.Errorf("No supported NVIDIA devices found")
	}

	return devices, names, UUIDs, nil
}