in controllers/solrcloud_controller.go [90:542]
func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
logger := log.FromContext(ctx)
instance := &solrv1beta1.SolrCloud{}
err := r.Get(ctx, req.NamespacedName, instance)
if err != nil {
if errors.IsNotFound(err) {
// Object not found, return. Created objects are automatically garbage collected.
// For additional cleanup logic use finalizers.
return reconcile.Result{}, nil
}
// Error reading the object - requeue the req.
return reconcile.Result{}, err
}
changed := instance.WithDefaults(logger)
if changed {
logger.Info("Setting default settings for SolrCloud")
if err = r.Update(ctx, instance); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{Requeue: true}, nil
}
// When working with the clouds, some actions outside of kube may need to be retried after a few seconds
requeueOrNot := reconcile.Result{}
newStatus := solrv1beta1.SolrCloudStatus{}
blockReconciliationOfStatefulSet := false
if err = r.reconcileZk(ctx, logger, instance, &newStatus); err != nil {
return requeueOrNot, err
}
// Generate Common Service
commonService := util.GenerateCommonService(instance)
// Check if the Common Service already exists
commonServiceLogger := logger.WithValues("service", commonService.Name)
foundCommonService := &corev1.Service{}
err = r.Get(ctx, types.NamespacedName{Name: commonService.Name, Namespace: commonService.Namespace}, foundCommonService)
if err != nil && errors.IsNotFound(err) {
commonServiceLogger.Info("Creating Common Service")
if err = controllerutil.SetControllerReference(instance, commonService, r.Scheme); err == nil {
err = r.Create(ctx, commonService)
}
} else if err == nil {
var needsUpdate bool
needsUpdate, err = util.OvertakeControllerRef(instance, foundCommonService, r.Scheme)
needsUpdate = util.CopyServiceFields(commonService, foundCommonService, commonServiceLogger) || needsUpdate
// Update the found Service and write the result back if there are any changes
if needsUpdate && err == nil {
commonServiceLogger.Info("Updating Common Service")
err = r.Update(ctx, foundCommonService)
}
}
if err != nil {
return requeueOrNot, err
}
solrNodeNames := instance.GetAllSolrPodNames()
hostNameIpMap := make(map[string]string)
// Generate a service for every Node
if instance.UsesIndividualNodeServices() {
for _, nodeName := range solrNodeNames {
err, ip := r.reconcileNodeService(ctx, logger, instance, nodeName)
if err != nil {
return requeueOrNot, err
}
// This IP Address only needs to be used in the hostname map if the SolrCloud is advertising the external address.
if instance.Spec.SolrAddressability.External.UseExternalAddress {
if ip == "" {
// If we are using this IP in the hostAliases of the statefulSet, it needs to be set for every service before trying to update the statefulSet
blockReconciliationOfStatefulSet = true
} else {
hostNameIpMap[instance.AdvertisedNodeHost(nodeName)] = ip
}
}
}
}
// Generate HeadlessService
if instance.UsesHeadlessService() {
headless := util.GenerateHeadlessService(instance)
// Check if the HeadlessService already exists
headlessServiceLogger := logger.WithValues("service", headless.Name)
foundHeadless := &corev1.Service{}
err = r.Get(ctx, types.NamespacedName{Name: headless.Name, Namespace: headless.Namespace}, foundHeadless)
if err != nil && errors.IsNotFound(err) {
headlessServiceLogger.Info("Creating Headless Service")
if err = controllerutil.SetControllerReference(instance, headless, r.Scheme); err == nil {
err = r.Create(ctx, headless)
}
} else if err == nil {
var needsUpdate bool
needsUpdate, err = util.OvertakeControllerRef(instance, foundHeadless, r.Scheme)
needsUpdate = util.CopyServiceFields(headless, foundHeadless, headlessServiceLogger) || needsUpdate
// Update the found HeadlessService and write the result back if there are any changes
if needsUpdate && err == nil {
headlessServiceLogger.Info("Updating Headless Service")
err = r.Update(ctx, foundHeadless)
}
}
if err != nil {
return requeueOrNot, err
}
}
// Use a map to hold additional config info that gets determined during reconcile
// needed for creating the STS and supporting objects (secrets, config maps, and so on)
reconcileConfigInfo := make(map[string]string)
// Generate ConfigMap unless the user supplied a custom ConfigMap for solr.xml
if instance.Spec.CustomSolrKubeOptions.ConfigMapOptions != nil && instance.Spec.CustomSolrKubeOptions.ConfigMapOptions.ProvidedConfigMap != "" {
providedConfigMapName := instance.Spec.CustomSolrKubeOptions.ConfigMapOptions.ProvidedConfigMap
foundConfigMap := &corev1.ConfigMap{}
nn := types.NamespacedName{Name: providedConfigMapName, Namespace: instance.Namespace}
err = r.Get(ctx, nn, foundConfigMap)
if err != nil {
return requeueOrNot, err // if they passed a providedConfigMap name, then it must exist
}
if foundConfigMap.Data != nil {
logXml, hasLogXml := foundConfigMap.Data[util.LogXmlFile]
solrXml, hasSolrXml := foundConfigMap.Data[util.SolrXmlFile]
// if there's a user-provided config, it must have one of the expected keys
if !hasLogXml && !hasSolrXml {
// TODO: Create event for the CRD.
return requeueOrNot, fmt.Errorf("user provided ConfigMap %s must have one of 'solr.xml' and/or 'log4j2.xml'",
providedConfigMapName)
}
if hasSolrXml {
// make sure the user-provided solr.xml is valid
if !strings.Contains(solrXml, "${hostPort:") {
return requeueOrNot,
fmt.Errorf("custom solr.xml in ConfigMap %s must contain a placeholder for the 'hostPort' variable, such as <int name=\"hostPort\">${hostPort:80}</int>",
providedConfigMapName)
}
// stored in the pod spec annotations on the statefulset so that we get a restart when solr.xml changes
reconcileConfigInfo[util.SolrXmlMd5Annotation] = fmt.Sprintf("%x", md5.Sum([]byte(solrXml)))
reconcileConfigInfo[util.SolrXmlFile] = foundConfigMap.Name
}
if hasLogXml {
if !strings.Contains(logXml, "monitorInterval=") {
// stored in the pod spec annotations on the statefulset so that we get a restart when the log config changes
reconcileConfigInfo[util.LogXmlMd5Annotation] = fmt.Sprintf("%x", md5.Sum([]byte(logXml)))
} // else log4j will automatically refresh for us, so no restart needed
reconcileConfigInfo[util.LogXmlFile] = foundConfigMap.Name
}
} else {
return requeueOrNot, fmt.Errorf("provided ConfigMap %s has no data", providedConfigMapName)
}
}
if reconcileConfigInfo[util.SolrXmlFile] == "" {
// no user provided solr.xml, so create the default
configMap := util.GenerateConfigMap(instance)
reconcileConfigInfo[util.SolrXmlMd5Annotation] = fmt.Sprintf("%x", md5.Sum([]byte(configMap.Data[util.SolrXmlFile])))
reconcileConfigInfo[util.SolrXmlFile] = configMap.Name
// Check if the ConfigMap already exists
configMapLogger := logger.WithValues("configMap", configMap.Name)
foundConfigMap := &corev1.ConfigMap{}
err = r.Get(ctx, types.NamespacedName{Name: configMap.Name, Namespace: configMap.Namespace}, foundConfigMap)
if err != nil && errors.IsNotFound(err) {
configMapLogger.Info("Creating ConfigMap")
if err = controllerutil.SetControllerReference(instance, configMap, r.Scheme); err == nil {
err = r.Create(ctx, configMap)
}
} else if err == nil {
var needsUpdate bool
needsUpdate, err = util.OvertakeControllerRef(instance, foundConfigMap, r.Scheme)
needsUpdate = util.CopyConfigMapFields(configMap, foundConfigMap, configMapLogger) || needsUpdate
// Update the found ConfigMap and write the result back if there are any changes
if needsUpdate && err == nil {
configMapLogger.Info("Updating ConfigMap")
err = r.Update(ctx, foundConfigMap)
}
}
if err != nil {
return requeueOrNot, err
}
}
// Holds security config info needed during construction of the StatefulSet
var security *util.SecurityConfig = nil
if instance.Spec.SolrSecurity != nil {
security, err = util.ReconcileSecurityConfig(ctx, &r.Client, instance)
if err == nil && security != nil {
// If authn enabled on Solr, we need to pass the auth header when making requests
ctx, err = security.AddAuthToContext(ctx)
if err != nil {
logger.Error(err, "failed to create Authorization header when reconciling")
}
}
if err != nil {
return requeueOrNot, err
}
}
// Only create stateful set if zkConnectionString can be found (must contain a host before the chroot)
zkConnectionString := newStatus.ZkConnectionString()
if len(zkConnectionString) < 2 || strings.HasPrefix(zkConnectionString, "/") {
blockReconciliationOfStatefulSet = true
logger.Info("Will not create/update the StatefulSet because the zookeeperConnectionString has no host", "zookeeperConnectionString", zkConnectionString)
}
// Holds TLS config info for a server cert and optionally a client cert as well
var tls *util.TLSCerts = nil
// can't have a solrClientTLS w/o solrTLS!
if instance.Spec.SolrTLS == nil && instance.Spec.SolrClientTLS != nil {
return requeueOrNot, fmt.Errorf("invalid TLS config, `spec.solrTLS` is not defined; `spec.solrClientTLS` can only be used in addition to `spec.solrTLS`")
}
// don't start reconciling TLS until we have ZK connectivity, avoids TLS code having to check for ZK
if !blockReconciliationOfStatefulSet && instance.Spec.SolrTLS != nil {
tls, err = r.reconcileTLSConfig(instance)
if err != nil {
return requeueOrNot, err
}
}
extAddressabilityOpts := instance.Spec.SolrAddressability.External
if extAddressabilityOpts != nil && extAddressabilityOpts.Method == solrv1beta1.Ingress {
// Generate Ingress
ingress := util.GenerateIngress(instance, solrNodeNames)
// Check if the Ingress already exists
ingressLogger := logger.WithValues("ingress", ingress.Name)
foundIngress := &netv1.Ingress{}
err = r.Get(ctx, types.NamespacedName{Name: ingress.Name, Namespace: ingress.Namespace}, foundIngress)
if err != nil && errors.IsNotFound(err) {
ingressLogger.Info("Creating Ingress")
if err = controllerutil.SetControllerReference(instance, ingress, r.Scheme); err == nil {
err = r.Create(ctx, ingress)
}
} else if err == nil {
var needsUpdate bool
needsUpdate, err = util.OvertakeControllerRef(instance, foundIngress, r.Scheme)
needsUpdate = util.CopyIngressFields(ingress, foundIngress, ingressLogger) || needsUpdate
// Update the found Ingress and write the result back if there are any changes
if needsUpdate && err == nil {
ingressLogger.Info("Updating Ingress")
err = r.Update(ctx, foundIngress)
}
}
if err != nil {
return requeueOrNot, err
}
}
var statefulSet *appsv1.StatefulSet
if !blockReconciliationOfStatefulSet {
// Generate StatefulSet that should exist
expectedStatefulSet := util.GenerateStatefulSet(instance, &newStatus, hostNameIpMap, reconcileConfigInfo, tls, security)
// Check if the StatefulSet already exists
statefulSetLogger := logger.WithValues("statefulSet", expectedStatefulSet.Name)
foundStatefulSet := &appsv1.StatefulSet{}
err = r.Get(ctx, types.NamespacedName{Name: expectedStatefulSet.Name, Namespace: expectedStatefulSet.Namespace}, foundStatefulSet)
// TODO: Move this logic down to the cluster ops and save the existing annotation in util.MaintainPreservedStatefulSetFields()
// Set the annotation for a scheduled restart, if necessary.
if nextRestartAnnotation, reconcileWaitDuration, schedulingErr := util.ScheduleNextRestart(instance.Spec.UpdateStrategy.RestartSchedule, foundStatefulSet.Spec.Template.Annotations); schedulingErr != nil {
logger.Error(schedulingErr, "Cannot parse restartSchedule cron", "cron", instance.Spec.UpdateStrategy.RestartSchedule)
} else {
if nextRestartAnnotation != "" {
// Set the new restart time annotation
expectedStatefulSet.Spec.Template.Annotations[util.SolrScheduledRestartAnnotation] = nextRestartAnnotation
// TODO: Create event for the CRD.
} else if existingRestartAnnotation, exists := foundStatefulSet.Spec.Template.Annotations[util.SolrScheduledRestartAnnotation]; exists {
// Keep the existing nextRestart annotation if it exists and we aren't setting a new one.
expectedStatefulSet.Spec.Template.Annotations[util.SolrScheduledRestartAnnotation] = existingRestartAnnotation
}
if reconcileWaitDuration != nil {
// Set the requeueAfter if it has not been set, or is greater than the time we need to wait to restart again
updateRequeueAfter(&requeueOrNot, *reconcileWaitDuration)
}
}
// Update or Create the StatefulSet
if err != nil && errors.IsNotFound(err) {
statefulSetLogger.Info("Creating StatefulSet")
if err = controllerutil.SetControllerReference(instance, expectedStatefulSet, r.Scheme); err == nil {
err = r.Create(ctx, expectedStatefulSet)
}
statefulSet = expectedStatefulSet
} else if err == nil {
util.MaintainPreservedStatefulSetFields(expectedStatefulSet, foundStatefulSet)
// Check to see if the StatefulSet needs an update
var needsUpdate bool
needsUpdate, err = util.OvertakeControllerRef(instance, foundStatefulSet, r.Scheme)
needsUpdate = util.CopyStatefulSetFields(expectedStatefulSet, foundStatefulSet, statefulSetLogger) || needsUpdate
// Update the found StatefulSet and write the result back if there are any changes
if needsUpdate && err == nil {
statefulSetLogger.Info("Updating StatefulSet")
err = r.Update(ctx, foundStatefulSet)
}
statefulSet = foundStatefulSet
}
if err != nil {
return requeueOrNot, err
}
} else {
// If we are blocking the reconciliation of the statefulSet, we still want to find information about it.
err = r.Get(ctx, types.NamespacedName{Name: instance.StatefulSetName(), Namespace: instance.Namespace}, statefulSet)
if err != nil {
if !errors.IsNotFound(err) {
return requeueOrNot, err
} else {
statefulSet = nil
}
}
}
// *********************************************************
// The operations after this require a statefulSet to exist,
// including updating the solrCloud status
// *********************************************************
if statefulSet == nil {
return requeueOrNot, err
}
// Do not reconcile the storage finalizer unless we have PVC Labels that we know the Solr data PVCs are using.
// Otherwise it will delete all PVCs possibly
if len(statefulSet.Spec.Selector.MatchLabels) > 0 {
if err := r.reconcileStorageFinalizer(ctx, instance, statefulSet.Spec.Selector.MatchLabels, logger); err != nil {
logger.Error(err, "Cannot delete PVCs while garbage collecting after deletion.")
updateRequeueAfter(&requeueOrNot, time.Second*15)
}
}
// Get the SolrCloud's Pods and initialize them if necessary
var podList []corev1.Pod
var podSelector labels.Selector
if podSelector, podList, err = r.initializePods(ctx, instance, logger); err != nil {
return requeueOrNot, err
}
// Make sure the SolrCloud status is up-to-date with the state of the cluster
var outOfDatePods util.OutOfDatePodSegmentation
var availableUpdatedPodCount int
outOfDatePods, availableUpdatedPodCount, err = createCloudStatus(instance, &newStatus, statefulSet.Status, podSelector, podList)
if err != nil {
return requeueOrNot, err
}
// We only want to do one cluster operation at a time, so we use a lock to ensure that.
// Update or Scale, one-at-a-time. We do not want to do both.
hasReadyPod := newStatus.ReadyReplicas > 0
var retryLaterDuration time.Duration
if clusterOpLock, hasAnn := statefulSet.Annotations[util.ClusterOpsLockAnnotation]; hasAnn {
clusterOpMetadata := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]
switch clusterOpLock {
case util.UpdateLock:
retryLaterDuration, err = handleManagedCloudRollingUpdate(ctx, r, instance, statefulSet, outOfDatePods, hasReadyPod, availableUpdatedPodCount, logger)
case util.ScaleDownLock:
retryLaterDuration, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, clusterOpMetadata, podList, logger)
case util.ScaleUpLock:
retryLaterDuration, err = handleManagedCloudScaleUp(ctx, r, instance, statefulSet, clusterOpMetadata, logger)
default:
// This shouldn't happen, but we don't want to be stuck if it does.
// Just remove the cluster Op, because the solr operator version running does not support it.
err = clearClusterOp(ctx, r, statefulSet, "clusterOp not supported", logger)
}
} else {
lockAcquired := false
// Start cluster operations if needed.
// The operations will be actually run in future reconcile loops, but a clusterOpLock will be acquired here.
// And that lock will tell future reconcile loops that the operation needs to be done.
// If a non-managed scale needs to take place, this method will update the StatefulSet without starting
// a "locked" cluster operation
lockAcquired, retryLaterDuration, err = determineRollingUpdateClusterOpLockIfNecessary(ctx, r, instance, statefulSet, outOfDatePods, logger)
// Start cluster operations if needed.
// The operations will be actually run in future reconcile loops, but a clusterOpLock will be acquired here.
// And that lock will tell future reconcile loops that the operation needs to be done.
// If a non-managed scale needs to take place, this method will update the StatefulSet without starting
// a "locked" cluster operation
if !lockAcquired {
lockAcquired, retryLaterDuration, err = determineScaleClusterOpLockIfNecessary(ctx, r, instance, statefulSet, podList, logger)
}
// After a lock is acquired, the reconcile will be started again because the StatefulSet is being watched
}
if err != nil && retryLaterDuration == 0 {
retryLaterDuration = time.Second * 5
}
if retryLaterDuration > 0 {
updateRequeueAfter(&requeueOrNot, retryLaterDuration)
}
if err != nil {
return requeueOrNot, err
}
// Upsert or delete solrcloud-wide PodDisruptionBudget(s) based on 'Enabled' flag.
pdb := util.GeneratePodDisruptionBudget(instance, statefulSet.Spec.Selector.MatchLabels)
if instance.Spec.Availability.PodDisruptionBudget.Enabled != nil && *instance.Spec.Availability.PodDisruptionBudget.Enabled {
// Check if the PodDistruptionBudget already exists
pdbLogger := logger.WithValues("podDisruptionBudget", pdb.Name)
foundPDB := &policyv1.PodDisruptionBudget{}
err = r.Get(ctx, types.NamespacedName{Name: pdb.Name, Namespace: pdb.Namespace}, foundPDB)
if err != nil && errors.IsNotFound(err) {
pdbLogger.Info("Creating PodDisruptionBudget")
if err = controllerutil.SetControllerReference(instance, pdb, r.Scheme); err == nil {
err = r.Create(ctx, pdb)
}
} else if err == nil {
var needsUpdate bool
needsUpdate, err = util.OvertakeControllerRef(instance, foundPDB, r.Scheme)
needsUpdate = util.CopyPodDisruptionBudgetFields(pdb, foundPDB, pdbLogger) || needsUpdate
// Update the found PodDistruptionBudget and write the result back if there are any changes
if needsUpdate && err == nil {
pdbLogger.Info("Updating PodDisruptionBudget")
err = r.Update(ctx, foundPDB)
}
}
if err != nil {
return requeueOrNot, err
}
} else { // PDB is disabled, make sure that we delete any previously created pdb that might exist.
err = r.Client.Delete(ctx, pdb)
if err != nil && !errors.IsNotFound(err) {
return requeueOrNot, err
}
}
if !reflect.DeepEqual(instance.Status, newStatus) {
logger.Info("Updating SolrCloud Status", "status", newStatus)
oldInstance := instance.DeepCopy()
instance.Status = newStatus
err = r.Status().Patch(ctx, instance, client.MergeFrom(oldInstance))
if err != nil {
return requeueOrNot, err
}
}
return requeueOrNot, err
}