func()

in pkg/controllers/workgenerator/controller.go [92:273]


func (r *Reconciler) Reconcile(ctx context.Context, req controllerruntime.Request) (controllerruntime.Result, error) {
	klog.V(2).InfoS("Start to reconcile a ClusterResourceBinding", "resourceBinding", req.Name)
	startTime := time.Now()
	bindingRef := klog.KRef(req.Namespace, req.Name)
	// add latency log
	defer func() {
		klog.V(2).InfoS("ClusterResourceBinding reconciliation loop ends", "resourceBinding", bindingRef, "latency", time.Since(startTime).Milliseconds())
	}()
	var resourceBinding fleetv1beta1.ClusterResourceBinding
	if err := r.Client.Get(ctx, req.NamespacedName, &resourceBinding); err != nil {
		if apierrors.IsNotFound(err) {
			return controllerruntime.Result{}, nil
		}
		klog.ErrorS(err, "Failed to get the resource binding", "resourceBinding", bindingRef)
		return controllerruntime.Result{}, controller.NewAPIServerError(true, err)
	}

	// handle the case the binding is deleting
	if resourceBinding.DeletionTimestamp != nil {
		return r.handleDelete(ctx, resourceBinding.DeepCopy())
	}

	// we only care about the bound bindings. We treat unscheduled bindings as bound until they are deleted.
	if resourceBinding.Spec.State != fleetv1beta1.BindingStateBound && resourceBinding.Spec.State != fleetv1beta1.BindingStateUnscheduled {
		klog.V(2).InfoS("Skip reconciling clusterResourceBinding that is not bound", "state", resourceBinding.Spec.State, "resourceBinding", bindingRef)
		return controllerruntime.Result{}, nil
	}

	// Getting the member cluster before the adding the finalizer if there is no finalizer present.
	// If the member cluster is not found and finalizer is not present, we skip the reconciliation and no work will be created.
	// In this case, no need to add the finalizer to make sure we clean up all the works.
	cluster := clusterv1beta1.MemberCluster{}
	if err := r.Client.Get(ctx, types.NamespacedName{Name: resourceBinding.Spec.TargetCluster}, &cluster); err != nil {
		if apierrors.IsNotFound(err) {
			klog.V(2).InfoS("Skip reconciling clusterResourceBinding when the cluster is deleted", "memberCluster", resourceBinding.Spec.TargetCluster, "clusterResourceBinding", bindingRef)
			return controllerruntime.Result{}, nil
		}
		klog.ErrorS(err, "Failed to get the memberCluster", "memberCluster", resourceBinding.Spec.TargetCluster, "clusterResourceBinding", bindingRef)
		return controllerruntime.Result{}, controller.NewAPIServerError(true, err)
	}

	// make sure that the resource binding obj has a finalizer
	if err := r.ensureFinalizer(ctx, &resourceBinding); err != nil {
		return controllerruntime.Result{}, err
	}

	// When the binding is in the unscheduled state, rollout controller won't update the condition anymore.
	// We treat the unscheduled binding as bound until the rollout controller deletes the binding and here controller still
	// updates the status for troubleshooting purpose.
	// Requeue until the rollout controller finishes processing the binding.
	if resourceBinding.Spec.State == fleetv1beta1.BindingStateBound {
		rolloutStartedCondition := resourceBinding.GetCondition(string(fleetv1beta1.ResourceBindingRolloutStarted))
		// Though the bounded binding is not taking the latest resourceSnapshot, we still needs to reconcile the works.
		if !condition.IsConditionStatusFalse(rolloutStartedCondition, resourceBinding.Generation) &&
			!condition.IsConditionStatusTrue(rolloutStartedCondition, resourceBinding.Generation) {
			// The rollout controller is still in the processing of updating the condition.
			//
			// Note that running this branch would also skip the refreshing of apply strategies;
			// it will resume once the rollout controller updates the rollout started condition.
			klog.V(2).InfoS("Requeue the resource binding until the rollout controller finishes updating the status", "resourceBinding", bindingRef, "generation", resourceBinding.Generation, "rolloutStartedCondition", rolloutStartedCondition)
			return controllerruntime.Result{Requeue: true}, nil
		}
	}

	workUpdated := false
	overrideSucceeded := false
	// list all the corresponding works
	works, syncErr := r.listAllWorksAssociated(ctx, &resourceBinding)
	if syncErr == nil {
		// generate and apply the workUpdated works if we have all the works
		overrideSucceeded, workUpdated, syncErr = r.syncAllWork(ctx, &resourceBinding, works, &cluster)
	}
	// Reset the conditions and failed/drifted/diffed placements.
	for i := condition.OverriddenCondition; i < condition.TotalCondition; i++ {
		resourceBinding.RemoveCondition(string(i.ResourceBindingConditionType()))
	}
	resourceBinding.Status.FailedPlacements = nil
	resourceBinding.Status.DriftedPlacements = nil
	resourceBinding.Status.DiffedPlacements = nil
	if overrideSucceeded {
		overrideReason := condition.OverriddenSucceededReason
		overrideMessage := "Successfully applied the override rules on the resources"
		if len(resourceBinding.Spec.ClusterResourceOverrideSnapshots) == 0 &&
			len(resourceBinding.Spec.ResourceOverrideSnapshots) == 0 {
			overrideReason = condition.OverrideNotSpecifiedReason
			overrideMessage = "No override rules are configured for the selected resources"
		}
		resourceBinding.SetConditions(metav1.Condition{
			Status:             metav1.ConditionTrue,
			Type:               string(fleetv1beta1.ResourceBindingOverridden),
			Reason:             overrideReason,
			Message:            overrideMessage,
			ObservedGeneration: resourceBinding.Generation,
		})
	}

	if syncErr != nil {
		klog.ErrorS(syncErr, "Failed to sync all the works", "resourceBinding", bindingRef)
		//TODO: check if it's user error and set a different failed reason
		errorMessage := syncErr.Error()
		// unwrap will return nil if syncErr is not wrapped
		// the wrapped error string format is "%w: %s" so that remove ": " from messages
		if err := errors.Unwrap(syncErr); err != nil && len(err.Error()) > 2 {
			errorMessage = errorMessage[len(err.Error())+2:]
		}
		if !overrideSucceeded {
			resourceBinding.SetConditions(metav1.Condition{
				Status:             metav1.ConditionFalse,
				Type:               string(fleetv1beta1.ResourceBindingOverridden),
				Reason:             condition.OverriddenFailedReason,
				Message:            fmt.Sprintf("Failed to apply the override rules on the resources: %s", errorMessage),
				ObservedGeneration: resourceBinding.Generation,
			})
		} else {
			resourceBinding.SetConditions(metav1.Condition{
				Status:             metav1.ConditionFalse,
				Type:               string(fleetv1beta1.ResourceBindingWorkSynchronized),
				Reason:             condition.SyncWorkFailedReason,
				Message:            fmt.Sprintf("Failed to synchronize the work to the latest: %s", errorMessage),
				ObservedGeneration: resourceBinding.Generation,
			})
		}
	} else {
		resourceBinding.SetConditions(metav1.Condition{
			Status:             metav1.ConditionTrue,
			Type:               string(fleetv1beta1.ResourceBindingWorkSynchronized),
			Reason:             condition.AllWorkSyncedReason,
			ObservedGeneration: resourceBinding.Generation,
			Message:            "All of the works are synchronized to the latest",
		})
		switch {
		case !workUpdated:
			// The Work object itself is unchanged; refresh the cluster resource binding status
			// based on the status information reported on the Work object(s).
			setBindingStatus(works, &resourceBinding)
		case resourceBinding.Spec.ApplyStrategy == nil || resourceBinding.Spec.ApplyStrategy.Type != fleetv1beta1.ApplyStrategyTypeReportDiff:
			// The Work object itself has changed; set a False Applied condition which signals
			// that resources are in the process of being applied.
			resourceBinding.SetConditions(metav1.Condition{
				Status:             metav1.ConditionFalse,
				Type:               string(fleetv1beta1.ResourceBindingApplied),
				Reason:             condition.WorkApplyInProcess,
				Message:            "Resources are being applied",
				ObservedGeneration: resourceBinding.Generation,
			})
		case resourceBinding.Spec.ApplyStrategy.Type == fleetv1beta1.ApplyStrategyTypeReportDiff:
			// The Work object itself has changed; set a False DiffReported condition which signals
			// that diff reporting on resources are in progress.
			resourceBinding.SetConditions(metav1.Condition{
				Status:             metav1.ConditionFalse,
				Type:               string(fleetv1beta1.ResourceBindingDiffReported),
				Reason:             condition.WorkDiffReportInProcess,
				Message:            "Diff reporting on resources is in progress",
				ObservedGeneration: resourceBinding.Generation,
			})
		}
	}

	// update the resource binding status
	if updateErr := r.updateBindingStatusWithRetry(ctx, &resourceBinding); updateErr != nil {
		return controllerruntime.Result{}, updateErr
	}
	if errors.Is(syncErr, controller.ErrUserError) {
		// Stop retry when the error is caused by user error
		// For example, user provides an invalid overrides or cannot extract the resources from config map.
		klog.ErrorS(syncErr, "Stopped retrying the resource binding", "resourceBinding", bindingRef)
		return controllerruntime.Result{}, nil
	}

	if errors.Is(syncErr, errResourceSnapshotNotFound) {
		// This error usually indicates that the resource snapshot is deleted since the rollout controller which fills
		// the resource snapshot share the same informer cache with this controller. We don't need to retry in this case
		// since the resource snapshot will not come back. We will get another event if the binding is pointing to a new resource.
		// However, this error can happen when the resource snapshot exists during the IT test when the client that creates
		// the resource snapshot is not the same as the controller client so that we need to retry in this case.
		// This error can also happen if the user uses a customized rollout controller that does not share the same informer cache with this controller.
		return controllerruntime.Result{Requeue: true}, nil
	}
	// requeue if we failed to sync the work
	// If we update the works, their status will be changed and will be detected by the watch event.
	return controllerruntime.Result{}, syncErr
}