in controllers/solr_cluster_ops_util.go [351:426]
func handleManagedCloudRollingUpdate(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, clusterOp *SolrClusterOp, outOfDatePods util.OutOfDatePodSegmentation, hasReadyPod bool, availableUpdatedPodCount int, logger logr.Logger) (operationComplete bool, requestInProgress bool, retryLaterDuration time.Duration, nextClusterOp *SolrClusterOp, err error) {
// Manage the updating of out-of-spec pods, if the Managed UpdateStrategy has been specified.
updateLogger := logger.WithName("ManagedUpdateSelector")
// First check if all pods are up to date and ready. If so the rolling update is complete
configuredPods := int(*statefulSet.Spec.Replicas)
if configuredPods == availableUpdatedPodCount {
updateMetadata := &RollingUpdateMetadata{}
if clusterOp.Metadata != "" {
if err = json.Unmarshal([]byte(clusterOp.Metadata), &updateMetadata); err != nil {
updateLogger.Error(err, "Could not unmarshal metadata for rolling update operation")
}
}
operationComplete = true
// Only do a re-balancing for rolling restarts that migrated replicas
// If a scale-up will occur afterwards, skip the re-balancing, because it will occur after the scale-up anyway
if updateMetadata.RequiresReplicaMigration && *instance.Spec.Replicas <= *statefulSet.Spec.Replicas {
nextClusterOp = &SolrClusterOp{
Operation: BalanceReplicasLock,
Metadata: "RollingUpdateComplete",
}
}
return
} else if outOfDatePods.IsEmpty() {
// Just return and wait for the updated pods to come up healthy, these will call new reconciles, so there is nothing for us to do
return
} else {
// The out of date pods that have not been started, should all be updated immediately.
// There is no use "safely" updating pods which have not been started yet.
podsToUpdate := append([]corev1.Pod{}, outOfDatePods.NotStarted...)
for _, pod := range outOfDatePods.NotStarted {
updateLogger.Info("Pod killed for update.", "pod", pod.Name, "reason", "The solr container in the pod has not yet started, thus it is safe to update.")
}
// Don't exit on an error, which would only occur because of an HTTP Exception. Requeue later instead.
// We won't kill pods that we need the cluster state for, but we can kill the pods that are already not running.
// This is important for scenarios where there is a bad pod config and nothing is running, but we need to do
// a restart to get a working pod config.
state, retryLater, apiError := util.GetNodeReplicaState(ctx, instance, statefulSet, hasReadyPod, logger)
if apiError != nil {
return false, true, 0, nil, apiError
} else if !retryLater {
// If the cluster status has been successfully fetched, then add the pods scheduled for deletion
// This requires the clusterState to be fetched successfully to ensure that we know if there
// are replicas living on the pod
podsToUpdate = append(podsToUpdate, outOfDatePods.ScheduledForDeletion...)
// Pick which pods should be deleted for an update.
var additionalPodsToUpdate []corev1.Pod
additionalPodsToUpdate, retryLater =
util.DeterminePodsSafeToUpdate(instance, int(*statefulSet.Spec.Replicas), outOfDatePods, state, availableUpdatedPodCount, updateLogger)
// If we do not have the clusterState, it's not safe to update pods that are running
if !retryLater {
podsToUpdate = append(podsToUpdate, additionalPodsToUpdate...)
}
}
// Only actually delete a running pod if it has been evicted, or doesn't need eviction (persistent storage)
for _, pod := range podsToUpdate {
retryLaterDurationTemp, inProgTmp, errTemp := DeletePodForUpdate(ctx, r, instance, &pod, state.PodHasReplicas(instance, pod.Name), updateLogger)
requestInProgress = requestInProgress || inProgTmp
// Use the retryLaterDuration of the pod that requires a retry the soonest (smallest duration > 0)
if retryLaterDurationTemp > 0 && (retryLaterDurationTemp < retryLaterDuration || retryLaterDuration == 0) {
retryLaterDuration = retryLaterDurationTemp
}
if errTemp != nil {
err = errTemp
}
}
if retryLater && retryLaterDuration == 0 {
retryLaterDuration = time.Second * 10
}
}
return
}