func handleManagedCloudRollingUpdate()

in controllers/solr_cluster_ops_util.go [351:426]


func handleManagedCloudRollingUpdate(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, clusterOp *SolrClusterOp, outOfDatePods util.OutOfDatePodSegmentation, hasReadyPod bool, availableUpdatedPodCount int, logger logr.Logger) (operationComplete bool, requestInProgress bool, retryLaterDuration time.Duration, nextClusterOp *SolrClusterOp, err error) {
	// Manage the updating of out-of-spec pods, if the Managed UpdateStrategy has been specified.
	updateLogger := logger.WithName("ManagedUpdateSelector")

	// First check if all pods are up to date and ready. If so the rolling update is complete
	configuredPods := int(*statefulSet.Spec.Replicas)
	if configuredPods == availableUpdatedPodCount {
		updateMetadata := &RollingUpdateMetadata{}
		if clusterOp.Metadata != "" {
			if err = json.Unmarshal([]byte(clusterOp.Metadata), &updateMetadata); err != nil {
				updateLogger.Error(err, "Could not unmarshal metadata for rolling update operation")
			}
		}
		operationComplete = true
		// Only do a re-balancing for rolling restarts that migrated replicas
		// If a scale-up will occur afterwards, skip the re-balancing, because it will occur after the scale-up anyway
		if updateMetadata.RequiresReplicaMigration && *instance.Spec.Replicas <= *statefulSet.Spec.Replicas {
			nextClusterOp = &SolrClusterOp{
				Operation: BalanceReplicasLock,
				Metadata:  "RollingUpdateComplete",
			}
		}
		return
	} else if outOfDatePods.IsEmpty() {
		// Just return and wait for the updated pods to come up healthy, these will call new reconciles, so there is nothing for us to do
		return
	} else {
		// The out of date pods that have not been started, should all be updated immediately.
		// There is no use "safely" updating pods which have not been started yet.
		podsToUpdate := append([]corev1.Pod{}, outOfDatePods.NotStarted...)
		for _, pod := range outOfDatePods.NotStarted {
			updateLogger.Info("Pod killed for update.", "pod", pod.Name, "reason", "The solr container in the pod has not yet started, thus it is safe to update.")
		}

		// Don't exit on an error, which would only occur because of an HTTP Exception. Requeue later instead.
		// We won't kill pods that we need the cluster state for, but we can kill the pods that are already not running.
		// This is important for scenarios where there is a bad pod config and nothing is running, but we need to do
		// a restart to get a working pod config.
		state, retryLater, apiError := util.GetNodeReplicaState(ctx, instance, statefulSet, hasReadyPod, logger)
		if apiError != nil {
			return false, true, 0, nil, apiError
		} else if !retryLater {
			// If the cluster status has been successfully fetched, then add the pods scheduled for deletion
			// This requires the clusterState to be fetched successfully to ensure that we know if there
			// are replicas living on the pod
			podsToUpdate = append(podsToUpdate, outOfDatePods.ScheduledForDeletion...)

			// Pick which pods should be deleted for an update.
			var additionalPodsToUpdate []corev1.Pod
			additionalPodsToUpdate, retryLater =
				util.DeterminePodsSafeToUpdate(instance, int(*statefulSet.Spec.Replicas), outOfDatePods, state, availableUpdatedPodCount, updateLogger)
			// If we do not have the clusterState, it's not safe to update pods that are running
			if !retryLater {
				podsToUpdate = append(podsToUpdate, additionalPodsToUpdate...)
			}
		}

		// Only actually delete a running pod if it has been evicted, or doesn't need eviction (persistent storage)
		for _, pod := range podsToUpdate {
			retryLaterDurationTemp, inProgTmp, errTemp := DeletePodForUpdate(ctx, r, instance, &pod, state.PodHasReplicas(instance, pod.Name), updateLogger)
			requestInProgress = requestInProgress || inProgTmp

			// Use the retryLaterDuration of the pod that requires a retry the soonest (smallest duration > 0)
			if retryLaterDurationTemp > 0 && (retryLaterDurationTemp < retryLaterDuration || retryLaterDuration == 0) {
				retryLaterDuration = retryLaterDurationTemp
			}
			if errTemp != nil {
				err = errTemp
			}
		}
		if retryLater && retryLaterDuration == 0 {
			retryLaterDuration = time.Second * 10
		}
	}
	return
}