func()

in pkg/controllers/member/internalserviceexport/controller.go [207:263]


func (r *Reconciler) observeMetrics(ctx context.Context,
	internalSvcExport *fleetnetv1alpha1.InternalServiceExport,
	startTime time.Time) error {
	// Check if a metric data point has been observed for the current resource version of the object; this helps guard
	// against repeated observation of metric data points for the same resource version of an object due to no-op
	// reconciliations (e.g. resyncs, untracked changes).
	lastObservedResourceVersion, ok := internalSvcExport.Annotations[metrics.MetricsAnnotationLastObservedResourceVersion]
	currentResourceVersion := internalSvcExport.Spec.ServiceReference.ResourceVersion
	if ok && lastObservedResourceVersion == currentResourceVersion {
		// A data point has been observed for this resource version; skip the observation.
		return nil
	}

	// Observe a new data point.

	// Annotate the object to track the last observed resource version; this must happen before the actual observation.
	if internalSvcExport.Annotations == nil {
		// Initialize the annotation map if it is empty.
		internalSvcExport.Annotations = map[string]string{}
	}
	internalSvcExport.Annotations[metrics.MetricsAnnotationLastObservedResourceVersion] = currentResourceVersion
	if err := r.HubClient.Update(ctx, internalSvcExport); err != nil {
		return err
	}

	// Skip the observation if the exportedSince field is empty in the object reference.
	// Note that in most cases this branch should never run as the Fleet networking controllers will always assign a
	// timestamp for each exported object.
	if internalSvcExport.Spec.ServiceReference.ExportedSince.IsZero() {
		klog.V(4).InfoS("exportedSince timestamp is absent; service export duration data point is not collected",
			"internalServiceExport", klog.KObj(internalSvcExport))
		return nil
	}
	timeSpent := startTime.Sub(internalSvcExport.Spec.ServiceReference.ExportedSince.Time).Milliseconds()
	// Under some rare circumstances (such as user manipulating the timestamps; note that for this specific metric
	// clock drifts are less of an issue as all timestamps are from the same local lock), it could
	// happen that the valid timestamp of an ServiceExport appears later than its conflict resolution timestamp.
	// To avoid negative outliers affecting data analysis, this controller assigns a constant of exactly 1 second
	// when the calculated duration does not make sense.
	if timeSpent <= 0 {
		timeSpent = time.Second.Milliseconds() * 1
		klog.V(4).InfoS("A negative service export duration data point has been observed",
			"serviceNamespacedName", internalSvcExport.Spec.ServiceReference.NamespacedName,
			"originClusterID", internalSvcExport.Spec.ServiceReference.ClusterID)
	}
	// Similarly, to avoid large outliers skewing the stats (e.g. averages), this controller caps the data point
	// to a constant value.
	if timeSpent > int64(metrics.ExportDurationRightBound) {
		timeSpent = int64(metrics.ExportDurationRightBound)
	}
	svcExportDuration.WithLabelValues(r.MemberClusterID).Observe(float64(timeSpent))
	// TO-DO (chenyu1): Remove the metric logs when histogram metrics are supported in the backend.
	klog.V(2).InfoS("serviceExportDurationMilliseconds",
		"value", timeSpent,
		"originClusterID", r.MemberClusterID)
	return nil
}