func newClient()

in components/otelopscol/receiver/dcgmreceiver/client.go [73:121]


func newClient(settings *dcgmClientSettings, logger *zap.Logger) (*dcgmClient, error) {
	dcgmCleanup, err := initializeDcgm(settings.endpoint, logger)
	if err != nil {
		return nil, errors.Join(ErrDcgmInitialization, err)
	}
	enabledFieldGroup := dcgm.FieldHandle{}
	requestedFieldIDs := toFieldIDs(settings.fields)
	supportedProfilingFieldIDs, err := getSupportedProfilingFields()
	if err != nil {
		// If there is error querying the supported fields at all, let the
		// receiver collect basic metrics: (GPU utilization, used/free memory).
		logger.Sugar().Warnf("Error querying supported profiling fields on '%w'. GPU profiling metrics will not be collected.", err)
	}
	enabledFields, unavailableFields := filterSupportedFields(requestedFieldIDs, supportedProfilingFieldIDs)
	for _, f := range unavailableFields {
		logger.Sugar().Warnf("Field '%s' is not supported", dcgmIDToName[f])
	}
	var deviceGroup dcgm.GroupHandle
	if len(enabledFields) != 0 {
		supportedDeviceIndices, err := dcgm.GetSupportedDevices()
		if err != nil {
			return nil, fmt.Errorf("Unable to discover supported GPUs on %w", err)
		}
		logger.Sugar().Infof("Discovered %d supported GPU devices", len(supportedDeviceIndices))

		deviceGroup, err = createDeviceGroup(logger, supportedDeviceIndices)
		if err != nil {
			return nil, err
		}
		enabledFieldGroup, err = setWatchesOnEnabledFields(settings.pollingInterval, logger, deviceGroup, enabledFields)
		if err != nil {
			_ = dcgm.FieldGroupDestroy(enabledFieldGroup)
			return nil, fmt.Errorf("Unable to set field watches on %w", err)
		}
	}
	return &dcgmClient{
		logger:                         logger.Sugar(),
		handleCleanup:                  dcgmCleanup,
		enabledFieldIDs:                enabledFields,
		enabledFieldGroup:              enabledFieldGroup,
		deviceGroup:                    deviceGroup,
		devices:                        map[uint]deviceMetrics{},
		lastSuccessfulPoll:             time.Now(),
		deviceMetricToFailedQueryCount: make(map[string]int),
		pollingInterval:                settings.pollingInterval,
		retryBlankValues:               settings.retryBlankValues,
		maxRetries:                     settings.maxRetries,
	}, nil
}