func()

in controllers/solrcloud_controller.go [90:542]


func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
	logger := log.FromContext(ctx)

	instance := &solrv1beta1.SolrCloud{}
	err := r.Get(ctx, req.NamespacedName, instance)
	if err != nil {
		if errors.IsNotFound(err) {
			// Object not found, return.  Created objects are automatically garbage collected.
			// For additional cleanup logic use finalizers.
			return reconcile.Result{}, nil
		}
		// Error reading the object - requeue the req.
		return reconcile.Result{}, err
	}

	changed := instance.WithDefaults(logger)
	if changed {
		logger.Info("Setting default settings for SolrCloud")
		if err = r.Update(ctx, instance); err != nil {
			return reconcile.Result{}, err
		}
		return reconcile.Result{Requeue: true}, nil
	}

	// When working with the clouds, some actions outside of kube may need to be retried after a few seconds
	requeueOrNot := reconcile.Result{}

	newStatus := solrv1beta1.SolrCloudStatus{}

	blockReconciliationOfStatefulSet := false
	if err = r.reconcileZk(ctx, logger, instance, &newStatus); err != nil {
		return requeueOrNot, err
	}

	// Generate Common Service
	commonService := util.GenerateCommonService(instance)

	// Check if the Common Service already exists
	commonServiceLogger := logger.WithValues("service", commonService.Name)
	foundCommonService := &corev1.Service{}
	err = r.Get(ctx, types.NamespacedName{Name: commonService.Name, Namespace: commonService.Namespace}, foundCommonService)
	if err != nil && errors.IsNotFound(err) {
		commonServiceLogger.Info("Creating Common Service")
		if err = controllerutil.SetControllerReference(instance, commonService, r.Scheme); err == nil {
			err = r.Create(ctx, commonService)
		}
	} else if err == nil {
		var needsUpdate bool
		needsUpdate, err = util.OvertakeControllerRef(instance, foundCommonService, r.Scheme)
		needsUpdate = util.CopyServiceFields(commonService, foundCommonService, commonServiceLogger) || needsUpdate

		// Update the found Service and write the result back if there are any changes
		if needsUpdate && err == nil {
			commonServiceLogger.Info("Updating Common Service")
			err = r.Update(ctx, foundCommonService)
		}
	}
	if err != nil {
		return requeueOrNot, err
	}

	solrNodeNames := instance.GetAllSolrPodNames()

	hostNameIpMap := make(map[string]string)
	// Generate a service for every Node
	if instance.UsesIndividualNodeServices() {
		for _, nodeName := range solrNodeNames {
			err, ip := r.reconcileNodeService(ctx, logger, instance, nodeName)
			if err != nil {
				return requeueOrNot, err
			}
			// This IP Address only needs to be used in the hostname map if the SolrCloud is advertising the external address.
			if instance.Spec.SolrAddressability.External.UseExternalAddress {
				if ip == "" {
					// If we are using this IP in the hostAliases of the statefulSet, it needs to be set for every service before trying to update the statefulSet
					blockReconciliationOfStatefulSet = true
				} else {
					hostNameIpMap[instance.AdvertisedNodeHost(nodeName)] = ip
				}
			}
		}
	}

	// Generate HeadlessService
	if instance.UsesHeadlessService() {
		headless := util.GenerateHeadlessService(instance)

		// Check if the HeadlessService already exists
		headlessServiceLogger := logger.WithValues("service", headless.Name)
		foundHeadless := &corev1.Service{}
		err = r.Get(ctx, types.NamespacedName{Name: headless.Name, Namespace: headless.Namespace}, foundHeadless)
		if err != nil && errors.IsNotFound(err) {
			headlessServiceLogger.Info("Creating Headless Service")
			if err = controllerutil.SetControllerReference(instance, headless, r.Scheme); err == nil {
				err = r.Create(ctx, headless)
			}
		} else if err == nil {
			var needsUpdate bool
			needsUpdate, err = util.OvertakeControllerRef(instance, foundHeadless, r.Scheme)
			needsUpdate = util.CopyServiceFields(headless, foundHeadless, headlessServiceLogger) || needsUpdate

			// Update the found HeadlessService and write the result back if there are any changes
			if needsUpdate && err == nil {
				headlessServiceLogger.Info("Updating Headless Service")
				err = r.Update(ctx, foundHeadless)
			}
		}
		if err != nil {
			return requeueOrNot, err
		}
	}

	// Use a map to hold additional config info that gets determined during reconcile
	// needed for creating the STS and supporting objects (secrets, config maps, and so on)
	reconcileConfigInfo := make(map[string]string)

	// Generate ConfigMap unless the user supplied a custom ConfigMap for solr.xml
	if instance.Spec.CustomSolrKubeOptions.ConfigMapOptions != nil && instance.Spec.CustomSolrKubeOptions.ConfigMapOptions.ProvidedConfigMap != "" {
		providedConfigMapName := instance.Spec.CustomSolrKubeOptions.ConfigMapOptions.ProvidedConfigMap
		foundConfigMap := &corev1.ConfigMap{}
		nn := types.NamespacedName{Name: providedConfigMapName, Namespace: instance.Namespace}
		err = r.Get(ctx, nn, foundConfigMap)
		if err != nil {
			return requeueOrNot, err // if they passed a providedConfigMap name, then it must exist
		}

		if foundConfigMap.Data != nil {
			logXml, hasLogXml := foundConfigMap.Data[util.LogXmlFile]
			solrXml, hasSolrXml := foundConfigMap.Data[util.SolrXmlFile]

			// if there's a user-provided config, it must have one of the expected keys
			if !hasLogXml && !hasSolrXml {
				// TODO: Create event for the CRD.
				return requeueOrNot, fmt.Errorf("user provided ConfigMap %s must have one of 'solr.xml' and/or 'log4j2.xml'",
					providedConfigMapName)
			}

			if hasSolrXml {
				// make sure the user-provided solr.xml is valid
				if !strings.Contains(solrXml, "${hostPort:") {
					return requeueOrNot,
						fmt.Errorf("custom solr.xml in ConfigMap %s must contain a placeholder for the 'hostPort' variable, such as <int name=\"hostPort\">${hostPort:80}</int>",
							providedConfigMapName)
				}
				// stored in the pod spec annotations on the statefulset so that we get a restart when solr.xml changes
				reconcileConfigInfo[util.SolrXmlMd5Annotation] = fmt.Sprintf("%x", md5.Sum([]byte(solrXml)))
				reconcileConfigInfo[util.SolrXmlFile] = foundConfigMap.Name
			}

			if hasLogXml {
				if !strings.Contains(logXml, "monitorInterval=") {
					// stored in the pod spec annotations on the statefulset so that we get a restart when the log config changes
					reconcileConfigInfo[util.LogXmlMd5Annotation] = fmt.Sprintf("%x", md5.Sum([]byte(logXml)))
				} // else log4j will automatically refresh for us, so no restart needed
				reconcileConfigInfo[util.LogXmlFile] = foundConfigMap.Name
			}

		} else {
			return requeueOrNot, fmt.Errorf("provided ConfigMap %s has no data", providedConfigMapName)
		}
	}

	if reconcileConfigInfo[util.SolrXmlFile] == "" {
		// no user provided solr.xml, so create the default
		configMap := util.GenerateConfigMap(instance)

		reconcileConfigInfo[util.SolrXmlMd5Annotation] = fmt.Sprintf("%x", md5.Sum([]byte(configMap.Data[util.SolrXmlFile])))
		reconcileConfigInfo[util.SolrXmlFile] = configMap.Name

		// Check if the ConfigMap already exists
		configMapLogger := logger.WithValues("configMap", configMap.Name)
		foundConfigMap := &corev1.ConfigMap{}
		err = r.Get(ctx, types.NamespacedName{Name: configMap.Name, Namespace: configMap.Namespace}, foundConfigMap)
		if err != nil && errors.IsNotFound(err) {
			configMapLogger.Info("Creating ConfigMap")
			if err = controllerutil.SetControllerReference(instance, configMap, r.Scheme); err == nil {
				err = r.Create(ctx, configMap)
			}
		} else if err == nil {
			var needsUpdate bool
			needsUpdate, err = util.OvertakeControllerRef(instance, foundConfigMap, r.Scheme)
			needsUpdate = util.CopyConfigMapFields(configMap, foundConfigMap, configMapLogger) || needsUpdate

			// Update the found ConfigMap and write the result back if there are any changes
			if needsUpdate && err == nil {
				configMapLogger.Info("Updating ConfigMap")
				err = r.Update(ctx, foundConfigMap)
			}
		}
		if err != nil {
			return requeueOrNot, err
		}
	}

	// Holds security config info needed during construction of the StatefulSet
	var security *util.SecurityConfig = nil
	if instance.Spec.SolrSecurity != nil {
		security, err = util.ReconcileSecurityConfig(ctx, &r.Client, instance)
		if err == nil && security != nil {
			// If authn enabled on Solr, we need to pass the auth header when making requests
			ctx, err = security.AddAuthToContext(ctx)
			if err != nil {
				logger.Error(err, "failed to create Authorization header when reconciling")
			}
		}
		if err != nil {
			return requeueOrNot, err
		}
	}

	// Only create stateful set if zkConnectionString can be found (must contain a host before the chroot)
	zkConnectionString := newStatus.ZkConnectionString()
	if len(zkConnectionString) < 2 || strings.HasPrefix(zkConnectionString, "/") {
		blockReconciliationOfStatefulSet = true
		logger.Info("Will not create/update the StatefulSet because the zookeeperConnectionString has no host", "zookeeperConnectionString", zkConnectionString)
	}

	// Holds TLS config info for a server cert and optionally a client cert as well
	var tls *util.TLSCerts = nil

	// can't have a solrClientTLS w/o solrTLS!
	if instance.Spec.SolrTLS == nil && instance.Spec.SolrClientTLS != nil {
		return requeueOrNot, fmt.Errorf("invalid TLS config, `spec.solrTLS` is not defined; `spec.solrClientTLS` can only be used in addition to `spec.solrTLS`")
	}

	// don't start reconciling TLS until we have ZK connectivity, avoids TLS code having to check for ZK
	if !blockReconciliationOfStatefulSet && instance.Spec.SolrTLS != nil {
		tls, err = r.reconcileTLSConfig(instance)
		if err != nil {
			return requeueOrNot, err
		}
	}

	extAddressabilityOpts := instance.Spec.SolrAddressability.External
	if extAddressabilityOpts != nil && extAddressabilityOpts.Method == solrv1beta1.Ingress {
		// Generate Ingress
		ingress := util.GenerateIngress(instance, solrNodeNames)

		// Check if the Ingress already exists
		ingressLogger := logger.WithValues("ingress", ingress.Name)
		foundIngress := &netv1.Ingress{}
		err = r.Get(ctx, types.NamespacedName{Name: ingress.Name, Namespace: ingress.Namespace}, foundIngress)
		if err != nil && errors.IsNotFound(err) {
			ingressLogger.Info("Creating Ingress")
			if err = controllerutil.SetControllerReference(instance, ingress, r.Scheme); err == nil {
				err = r.Create(ctx, ingress)
			}
		} else if err == nil {
			var needsUpdate bool
			needsUpdate, err = util.OvertakeControllerRef(instance, foundIngress, r.Scheme)
			needsUpdate = util.CopyIngressFields(ingress, foundIngress, ingressLogger) || needsUpdate

			// Update the found Ingress and write the result back if there are any changes
			if needsUpdate && err == nil {
				ingressLogger.Info("Updating Ingress")
				err = r.Update(ctx, foundIngress)
			}
		}
		if err != nil {
			return requeueOrNot, err
		}
	}

	var statefulSet *appsv1.StatefulSet

	if !blockReconciliationOfStatefulSet {
		// Generate StatefulSet that should exist
		expectedStatefulSet := util.GenerateStatefulSet(instance, &newStatus, hostNameIpMap, reconcileConfigInfo, tls, security)

		// Check if the StatefulSet already exists
		statefulSetLogger := logger.WithValues("statefulSet", expectedStatefulSet.Name)
		foundStatefulSet := &appsv1.StatefulSet{}
		err = r.Get(ctx, types.NamespacedName{Name: expectedStatefulSet.Name, Namespace: expectedStatefulSet.Namespace}, foundStatefulSet)

		// TODO: Move this logic down to the cluster ops and save the existing annotation in util.MaintainPreservedStatefulSetFields()
		// Set the annotation for a scheduled restart, if necessary.
		if nextRestartAnnotation, reconcileWaitDuration, schedulingErr := util.ScheduleNextRestart(instance.Spec.UpdateStrategy.RestartSchedule, foundStatefulSet.Spec.Template.Annotations); schedulingErr != nil {
			logger.Error(schedulingErr, "Cannot parse restartSchedule cron", "cron", instance.Spec.UpdateStrategy.RestartSchedule)
		} else {
			if nextRestartAnnotation != "" {
				// Set the new restart time annotation
				expectedStatefulSet.Spec.Template.Annotations[util.SolrScheduledRestartAnnotation] = nextRestartAnnotation
				// TODO: Create event for the CRD.
			} else if existingRestartAnnotation, exists := foundStatefulSet.Spec.Template.Annotations[util.SolrScheduledRestartAnnotation]; exists {
				// Keep the existing nextRestart annotation if it exists and we aren't setting a new one.
				expectedStatefulSet.Spec.Template.Annotations[util.SolrScheduledRestartAnnotation] = existingRestartAnnotation
			}
			if reconcileWaitDuration != nil {
				// Set the requeueAfter if it has not been set, or is greater than the time we need to wait to restart again
				updateRequeueAfter(&requeueOrNot, *reconcileWaitDuration)
			}
		}

		// Update or Create the StatefulSet
		if err != nil && errors.IsNotFound(err) {
			statefulSetLogger.Info("Creating StatefulSet")
			if err = controllerutil.SetControllerReference(instance, expectedStatefulSet, r.Scheme); err == nil {
				err = r.Create(ctx, expectedStatefulSet)
			}
			statefulSet = expectedStatefulSet
		} else if err == nil {
			util.MaintainPreservedStatefulSetFields(expectedStatefulSet, foundStatefulSet)

			// Check to see if the StatefulSet needs an update
			var needsUpdate bool
			needsUpdate, err = util.OvertakeControllerRef(instance, foundStatefulSet, r.Scheme)
			needsUpdate = util.CopyStatefulSetFields(expectedStatefulSet, foundStatefulSet, statefulSetLogger) || needsUpdate

			// Update the found StatefulSet and write the result back if there are any changes
			if needsUpdate && err == nil {
				statefulSetLogger.Info("Updating StatefulSet")
				err = r.Update(ctx, foundStatefulSet)
			}
			statefulSet = foundStatefulSet
		}
		if err != nil {
			return requeueOrNot, err
		}
	} else {
		// If we are blocking the reconciliation of the statefulSet, we still want to find information about it.
		err = r.Get(ctx, types.NamespacedName{Name: instance.StatefulSetName(), Namespace: instance.Namespace}, statefulSet)
		if err != nil {
			if !errors.IsNotFound(err) {
				return requeueOrNot, err
			} else {
				statefulSet = nil
			}
		}
	}

	// *********************************************************
	// The operations after this require a statefulSet to exist,
	// including updating the solrCloud status
	// *********************************************************
	if statefulSet == nil {
		return requeueOrNot, err
	}

	// Do not reconcile the storage finalizer unless we have PVC Labels that we know the Solr data PVCs are using.
	// Otherwise it will delete all PVCs possibly
	if len(statefulSet.Spec.Selector.MatchLabels) > 0 {
		if err := r.reconcileStorageFinalizer(ctx, instance, statefulSet.Spec.Selector.MatchLabels, logger); err != nil {
			logger.Error(err, "Cannot delete PVCs while garbage collecting after deletion.")
			updateRequeueAfter(&requeueOrNot, time.Second*15)
		}
	}

	// Get the SolrCloud's Pods and initialize them if necessary
	var podList []corev1.Pod
	var podSelector labels.Selector
	if podSelector, podList, err = r.initializePods(ctx, instance, logger); err != nil {
		return requeueOrNot, err
	}

	// Make sure the SolrCloud status is up-to-date with the state of the cluster
	var outOfDatePods util.OutOfDatePodSegmentation
	var availableUpdatedPodCount int
	outOfDatePods, availableUpdatedPodCount, err = createCloudStatus(instance, &newStatus, statefulSet.Status, podSelector, podList)
	if err != nil {
		return requeueOrNot, err
	}

	// We only want to do one cluster operation at a time, so we use a lock to ensure that.
	// Update or Scale, one-at-a-time. We do not want to do both.
	hasReadyPod := newStatus.ReadyReplicas > 0
	var retryLaterDuration time.Duration
	if clusterOpLock, hasAnn := statefulSet.Annotations[util.ClusterOpsLockAnnotation]; hasAnn {
		clusterOpMetadata := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]
		switch clusterOpLock {
		case util.UpdateLock:
			retryLaterDuration, err = handleManagedCloudRollingUpdate(ctx, r, instance, statefulSet, outOfDatePods, hasReadyPod, availableUpdatedPodCount, logger)
		case util.ScaleDownLock:
			retryLaterDuration, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, clusterOpMetadata, podList, logger)
		case util.ScaleUpLock:
			retryLaterDuration, err = handleManagedCloudScaleUp(ctx, r, instance, statefulSet, clusterOpMetadata, logger)
		default:
			// This shouldn't happen, but we don't want to be stuck if it does.
			// Just remove the cluster Op, because the solr operator version running does not support it.
			err = clearClusterOp(ctx, r, statefulSet, "clusterOp not supported", logger)
		}
	} else {
		lockAcquired := false
		// Start cluster operations if needed.
		// The operations will be actually run in future reconcile loops, but a clusterOpLock will be acquired here.
		// And that lock will tell future reconcile loops that the operation needs to be done.
		// If a non-managed scale needs to take place, this method will update the StatefulSet without starting
		// a "locked" cluster operation
		lockAcquired, retryLaterDuration, err = determineRollingUpdateClusterOpLockIfNecessary(ctx, r, instance, statefulSet, outOfDatePods, logger)
		// Start cluster operations if needed.
		// The operations will be actually run in future reconcile loops, but a clusterOpLock will be acquired here.
		// And that lock will tell future reconcile loops that the operation needs to be done.
		// If a non-managed scale needs to take place, this method will update the StatefulSet without starting
		// a "locked" cluster operation
		if !lockAcquired {
			lockAcquired, retryLaterDuration, err = determineScaleClusterOpLockIfNecessary(ctx, r, instance, statefulSet, podList, logger)
		}
		// After a lock is acquired, the reconcile will be started again because the StatefulSet is being watched
	}
	if err != nil && retryLaterDuration == 0 {
		retryLaterDuration = time.Second * 5
	}
	if retryLaterDuration > 0 {
		updateRequeueAfter(&requeueOrNot, retryLaterDuration)
	}
	if err != nil {
		return requeueOrNot, err
	}

	// Upsert or delete solrcloud-wide PodDisruptionBudget(s) based on 'Enabled' flag.
	pdb := util.GeneratePodDisruptionBudget(instance, statefulSet.Spec.Selector.MatchLabels)
	if instance.Spec.Availability.PodDisruptionBudget.Enabled != nil && *instance.Spec.Availability.PodDisruptionBudget.Enabled {
		// Check if the PodDistruptionBudget already exists
		pdbLogger := logger.WithValues("podDisruptionBudget", pdb.Name)
		foundPDB := &policyv1.PodDisruptionBudget{}
		err = r.Get(ctx, types.NamespacedName{Name: pdb.Name, Namespace: pdb.Namespace}, foundPDB)
		if err != nil && errors.IsNotFound(err) {
			pdbLogger.Info("Creating PodDisruptionBudget")
			if err = controllerutil.SetControllerReference(instance, pdb, r.Scheme); err == nil {
				err = r.Create(ctx, pdb)
			}
		} else if err == nil {
			var needsUpdate bool
			needsUpdate, err = util.OvertakeControllerRef(instance, foundPDB, r.Scheme)
			needsUpdate = util.CopyPodDisruptionBudgetFields(pdb, foundPDB, pdbLogger) || needsUpdate

			// Update the found PodDistruptionBudget and write the result back if there are any changes
			if needsUpdate && err == nil {
				pdbLogger.Info("Updating PodDisruptionBudget")
				err = r.Update(ctx, foundPDB)
			}
		}
		if err != nil {
			return requeueOrNot, err
		}
	} else { // PDB is disabled, make sure that we delete any previously created pdb that might exist.
		err = r.Client.Delete(ctx, pdb)
		if err != nil && !errors.IsNotFound(err) {
			return requeueOrNot, err
		}
	}

	if !reflect.DeepEqual(instance.Status, newStatus) {
		logger.Info("Updating SolrCloud Status", "status", newStatus)
		oldInstance := instance.DeepCopy()
		instance.Status = newStatus
		err = r.Status().Patch(ctx, instance, client.MergeFrom(oldInstance))
		if err != nil {
			return requeueOrNot, err
		}
	}

	return requeueOrNot, err
}