in pkg/controllers/member/internalserviceexport/controller.go [207:263]
func (r *Reconciler) observeMetrics(ctx context.Context,
internalSvcExport *fleetnetv1alpha1.InternalServiceExport,
startTime time.Time) error {
// Check if a metric data point has been observed for the current resource version of the object; this helps guard
// against repeated observation of metric data points for the same resource version of an object due to no-op
// reconciliations (e.g. resyncs, untracked changes).
lastObservedResourceVersion, ok := internalSvcExport.Annotations[metrics.MetricsAnnotationLastObservedResourceVersion]
currentResourceVersion := internalSvcExport.Spec.ServiceReference.ResourceVersion
if ok && lastObservedResourceVersion == currentResourceVersion {
// A data point has been observed for this resource version; skip the observation.
return nil
}
// Observe a new data point.
// Annotate the object to track the last observed resource version; this must happen before the actual observation.
if internalSvcExport.Annotations == nil {
// Initialize the annotation map if it is empty.
internalSvcExport.Annotations = map[string]string{}
}
internalSvcExport.Annotations[metrics.MetricsAnnotationLastObservedResourceVersion] = currentResourceVersion
if err := r.HubClient.Update(ctx, internalSvcExport); err != nil {
return err
}
// Skip the observation if the exportedSince field is empty in the object reference.
// Note that in most cases this branch should never run as the Fleet networking controllers will always assign a
// timestamp for each exported object.
if internalSvcExport.Spec.ServiceReference.ExportedSince.IsZero() {
klog.V(4).InfoS("exportedSince timestamp is absent; service export duration data point is not collected",
"internalServiceExport", klog.KObj(internalSvcExport))
return nil
}
timeSpent := startTime.Sub(internalSvcExport.Spec.ServiceReference.ExportedSince.Time).Milliseconds()
// Under some rare circumstances (such as user manipulating the timestamps; note that for this specific metric
// clock drifts are less of an issue as all timestamps are from the same local lock), it could
// happen that the valid timestamp of an ServiceExport appears later than its conflict resolution timestamp.
// To avoid negative outliers affecting data analysis, this controller assigns a constant of exactly 1 second
// when the calculated duration does not make sense.
if timeSpent <= 0 {
timeSpent = time.Second.Milliseconds() * 1
klog.V(4).InfoS("A negative service export duration data point has been observed",
"serviceNamespacedName", internalSvcExport.Spec.ServiceReference.NamespacedName,
"originClusterID", internalSvcExport.Spec.ServiceReference.ClusterID)
}
// Similarly, to avoid large outliers skewing the stats (e.g. averages), this controller caps the data point
// to a constant value.
if timeSpent > int64(metrics.ExportDurationRightBound) {
timeSpent = int64(metrics.ExportDurationRightBound)
}
svcExportDuration.WithLabelValues(r.MemberClusterID).Observe(float64(timeSpent))
// TO-DO (chenyu1): Remove the metric logs when histogram metrics are supported in the backend.
klog.V(2).InfoS("serviceExportDurationMilliseconds",
"value", timeSpent,
"originClusterID", r.MemberClusterID)
return nil
}