in cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go [85:283]
func (o *ScaleUpOrchestrator) ScaleUp(
unschedulablePods []*apiv1.Pod,
nodes []*apiv1.Node,
daemonSets []*appsv1.DaemonSet,
nodeInfos map[string]*framework.NodeInfo,
allOrNothing bool, // Either request enough capacity for all unschedulablePods, or don't request it at all.
) (*status.ScaleUpStatus, errors.AutoscalerError) {
if !o.initialized {
return status.UpdateScaleUpError(&status.ScaleUpStatus{}, errors.NewAutoscalerError(errors.InternalError, "ScaleUpOrchestrator is not initialized"))
}
loggingQuota := klogx.PodsLoggingQuota()
for _, pod := range unschedulablePods {
klogx.V(1).UpTo(loggingQuota).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name)
}
klogx.V(1).Over(loggingQuota).Infof("%v other pods are also unschedulable", -loggingQuota.Left())
buildPodEquivalenceGroupsStart := time.Now()
podEquivalenceGroups := equivalence.BuildPodGroups(unschedulablePods)
metrics.UpdateDurationFromStart(metrics.BuildPodEquivalenceGroups, buildPodEquivalenceGroupsStart)
upcomingNodes, aErr := o.UpcomingNodes(nodeInfos)
if aErr != nil {
return status.UpdateScaleUpError(&status.ScaleUpStatus{}, aErr.AddPrefix("could not get upcoming nodes: "))
}
klog.V(4).Infof("Upcoming %d nodes", len(upcomingNodes))
nodeGroups := o.autoscalingContext.CloudProvider.NodeGroups()
if o.processors != nil && o.processors.NodeGroupListProcessor != nil {
var err error
nodeGroups, nodeInfos, err = o.processors.NodeGroupListProcessor.Process(o.autoscalingContext, nodeGroups, nodeInfos, unschedulablePods)
if err != nil {
return status.UpdateScaleUpError(&status.ScaleUpStatus{}, errors.ToAutoscalerError(errors.InternalError, err))
}
}
// Initialise binpacking limiter.
o.processors.BinpackingLimiter.InitBinpacking(o.autoscalingContext, nodeGroups)
resourcesLeft, aErr := o.resourceManager.ResourcesLeft(o.autoscalingContext, nodeInfos, nodes)
if aErr != nil {
return status.UpdateScaleUpError(&status.ScaleUpStatus{}, aErr.AddPrefix("could not compute total resources: "))
}
now := time.Now()
// Filter out invalid node groups
validNodeGroups, skippedNodeGroups := o.filterValidScaleUpNodeGroups(nodeGroups, nodeInfos, resourcesLeft, len(nodes)+len(upcomingNodes), now)
// Mark skipped node groups as processed.
for nodegroupID := range skippedNodeGroups {
o.processors.BinpackingLimiter.MarkProcessed(o.autoscalingContext, nodegroupID)
}
// Calculate expansion options
schedulablePodGroups := map[string][]estimator.PodEquivalenceGroup{}
var options []expander.Option
for _, nodeGroup := range validNodeGroups {
schedulablePodGroups[nodeGroup.Id()] = o.SchedulablePodGroups(podEquivalenceGroups, nodeGroup, nodeInfos[nodeGroup.Id()])
}
for _, nodeGroup := range validNodeGroups {
option := o.ComputeExpansionOption(nodeGroup, schedulablePodGroups, nodeInfos, len(nodes)+len(upcomingNodes), now, allOrNothing)
o.processors.BinpackingLimiter.MarkProcessed(o.autoscalingContext, nodeGroup.Id())
if len(option.Pods) == 0 || option.NodeCount == 0 {
klog.V(4).Infof("No pod can fit to %s", nodeGroup.Id())
} else if allOrNothing && len(option.Pods) < len(unschedulablePods) {
klog.V(4).Infof("Some pods can't fit to %s, giving up due to all-or-nothing scale-up strategy", nodeGroup.Id())
} else {
options = append(options, option)
}
if o.processors.BinpackingLimiter.StopBinpacking(o.autoscalingContext, options) {
break
}
}
// Finalize binpacking limiter.
o.processors.BinpackingLimiter.FinalizeBinpacking(o.autoscalingContext, options)
if len(options) == 0 {
klog.V(1).Info("No expansion options")
return &status.ScaleUpStatus{
Result: status.ScaleUpNoOptionsAvailable,
PodsRemainUnschedulable: GetRemainingPods(podEquivalenceGroups, skippedNodeGroups),
ConsideredNodeGroups: nodeGroups,
}, nil
}
// Pick some expansion option.
bestOption := o.autoscalingContext.ExpanderStrategy.BestOption(options, nodeInfos)
if bestOption == nil || bestOption.NodeCount <= 0 {
return &status.ScaleUpStatus{
Result: status.ScaleUpNoOptionsAvailable,
PodsRemainUnschedulable: GetRemainingPods(podEquivalenceGroups, skippedNodeGroups),
ConsideredNodeGroups: nodeGroups,
}, nil
}
klog.V(1).Infof("Best option to resize: %s", bestOption.NodeGroup.Id())
if len(bestOption.Debug) > 0 {
klog.V(1).Info(bestOption.Debug)
}
klog.V(1).Infof("Estimated %d nodes needed in %s", bestOption.NodeCount, bestOption.NodeGroup.Id())
// Cap new nodes to supported number of nodes in the cluster.
newNodes, aErr := o.GetCappedNewNodeCount(bestOption.NodeCount, len(nodes)+len(upcomingNodes))
if aErr != nil {
return status.UpdateScaleUpError(&status.ScaleUpStatus{PodsTriggeredScaleUp: bestOption.Pods}, aErr)
}
newNodes, aErr = o.applyLimits(newNodes, resourcesLeft, bestOption.NodeGroup, nodeInfos)
if aErr != nil {
return status.UpdateScaleUpError(
&status.ScaleUpStatus{PodsTriggeredScaleUp: bestOption.Pods},
aErr)
}
if newNodes < bestOption.NodeCount {
klog.V(1).Infof("Only %d nodes can be added to %s due to cluster-wide limits", newNodes, bestOption.NodeGroup.Id())
if allOrNothing {
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated")
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason)
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil
}
}
// If necessary, create the node group. This is no longer simulation, an empty node group will be created by cloud provider if supported.
createNodeGroupResults := make([]nodegroups.CreateNodeGroupResult, 0)
if !bestOption.NodeGroup.Exist() && !o.processors.AsyncNodeGroupStateChecker.IsUpcoming(bestOption.NodeGroup) {
if allOrNothing && bestOption.NodeGroup.MaxSize() < newNodes {
klog.V(1).Infof("Can only create a new node group with max %d nodes, need %d nodes", bestOption.NodeGroup.MaxSize(), newNodes)
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated")
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason)
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil
}
var scaleUpStatus *status.ScaleUpStatus
oldId := bestOption.NodeGroup.Id()
if o.autoscalingContext.AsyncNodeGroupsEnabled {
initializer := NewAsyncNodeGroupInitializer(bestOption, nodeInfos[oldId], o.scaleUpExecutor, o.taintConfig, daemonSets, o.processors.ScaleUpStatusProcessor, o.autoscalingContext, allOrNothing)
createNodeGroupResults, scaleUpStatus, aErr = o.CreateNodeGroupAsync(bestOption, nodeInfos, schedulablePodGroups, podEquivalenceGroups, daemonSets, initializer)
} else {
createNodeGroupResults, scaleUpStatus, aErr = o.CreateNodeGroup(bestOption, nodeInfos, schedulablePodGroups, podEquivalenceGroups, daemonSets)
}
if aErr != nil {
return scaleUpStatus, aErr
}
}
scaleUpInfos, aErr := o.balanceScaleUps(now, bestOption.NodeGroup, newNodes, nodeInfos, schedulablePodGroups)
if aErr != nil {
return status.UpdateScaleUpError(
&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults, PodsTriggeredScaleUp: bestOption.Pods},
aErr)
}
// Last check before scale-up. Node group capacity (both due to max size limits & current size) is only checked when balancing.
totalCapacity := 0
for _, sui := range scaleUpInfos {
totalCapacity += sui.NewSize - sui.CurrentSize
}
if totalCapacity < newNodes {
klog.V(1).Infof("Can only add %d nodes due to node group limits, need %d nodes", totalCapacity, newNodes)
if allOrNothing {
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated")
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason)
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil
}
}
// Execute scale up.
klog.V(1).Infof("Final scale-up plan: %v", scaleUpInfos)
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now, allOrNothing)
if aErr != nil {
return status.UpdateScaleUpError(
&status.ScaleUpStatus{
CreateNodeGroupResults: createNodeGroupResults,
FailedResizeNodeGroups: failedNodeGroups,
PodsTriggeredScaleUp: bestOption.Pods,
},
aErr,
)
}
o.clusterStateRegistry.Recalculate()
return &status.ScaleUpStatus{
Result: status.ScaleUpSuccessful,
ScaleUpInfos: scaleUpInfos,
PodsRemainUnschedulable: GetRemainingPods(podEquivalenceGroups, skippedNodeGroups),
ConsideredNodeGroups: nodeGroups,
CreateNodeGroupResults: createNodeGroupResults,
PodsTriggeredScaleUp: bestOption.Pods,
PodsAwaitEvaluation: GetPodsAwaitingEvaluation(podEquivalenceGroups, bestOption.NodeGroup.Id()),
}, nil
}