in cluster-autoscaler/core/scale_down.go [1290:1378]
func drainNode(node *apiv1.Node, pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod, client kube_client.Interface, recorder kube_record.EventRecorder,
maxGracefulTerminationSec int, maxPodEvictionTime time.Duration, waitBetweenRetries time.Duration,
podEvictionHeadroom time.Duration) (evictionResults map[string]status.PodEvictionResult, err error) {
evictionResults = make(map[string]status.PodEvictionResult)
retryUntil := time.Now().Add(maxPodEvictionTime)
confirmations := make(chan status.PodEvictionResult, len(pods))
daemonSetConfirmations := make(chan status.PodEvictionResult, len(daemonSetPods))
for _, pod := range pods {
evictionResults[pod.Name] = status.PodEvictionResult{Pod: pod, TimedOut: true, Err: nil}
go func(podToEvict *apiv1.Pod) {
confirmations <- evictPod(podToEvict, false, client, recorder, maxGracefulTerminationSec, retryUntil, waitBetweenRetries)
}(pod)
}
// Perform eviction of daemonset. We don't want to raise an error if daemonsetPod wasn't evict properly
for _, daemonSetPod := range daemonSetPods {
go func(podToEvict *apiv1.Pod) {
daemonSetConfirmations <- evictPod(podToEvict, true, client, recorder, maxGracefulTerminationSec, retryUntil, waitBetweenRetries)
}(daemonSetPod)
}
podsEvictionCounter := 0
for i := 0; i < len(pods)+len(daemonSetPods); i++ {
select {
case evictionResult := <-confirmations:
podsEvictionCounter++
evictionResults[evictionResult.Pod.Name] = evictionResult
if evictionResult.WasEvictionSuccessful() {
metrics.RegisterEvictions(1)
}
case <-daemonSetConfirmations:
case <-time.After(retryUntil.Sub(time.Now()) + 5*time.Second):
if podsEvictionCounter < len(pods) {
// All pods initially had results with TimedOut set to true, so the ones that didn't receive an actual result are correctly marked as timed out.
return evictionResults, errors.NewAutoscalerError(errors.ApiCallError, "Failed to drain node %s/%s: timeout when waiting for creating evictions", node.Namespace, node.Name)
}
klog.Infof("Timeout when waiting for creating daemonSetPods eviction")
}
}
evictionErrs := make([]error, 0)
for _, result := range evictionResults {
if !result.WasEvictionSuccessful() {
evictionErrs = append(evictionErrs, result.Err)
}
}
if len(evictionErrs) != 0 {
return evictionResults, errors.NewAutoscalerError(errors.ApiCallError, "Failed to drain node %s/%s, due to following errors: %v", node.Namespace, node.Name, evictionErrs)
}
// Evictions created successfully, wait maxGracefulTerminationSec + podEvictionHeadroom to see if pods really disappeared.
var allGone bool
for start := time.Now(); time.Now().Sub(start) < time.Duration(maxGracefulTerminationSec)*time.Second+podEvictionHeadroom; time.Sleep(5 * time.Second) {
allGone = true
for _, pod := range pods {
podreturned, err := client.CoreV1().Pods(pod.Namespace).Get(ctx.TODO(), pod.Name, metav1.GetOptions{})
if err == nil && (podreturned == nil || podreturned.Spec.NodeName == node.Name) {
klog.Errorf("Not deleted yet %s/%s", pod.Namespace, pod.Name)
allGone = false
break
}
if err != nil && !kube_errors.IsNotFound(err) {
klog.Errorf("Failed to check pod %s/%s: %v", pod.Namespace, pod.Name, err)
allGone = false
break
}
}
if allGone {
klog.V(1).Infof("All pods removed from %s", node.Name)
// Let the deferred function know there is no need for cleanup
return evictionResults, nil
}
}
for _, pod := range pods {
podReturned, err := client.CoreV1().Pods(pod.Namespace).Get(ctx.TODO(), pod.Name, metav1.GetOptions{})
if err == nil && (podReturned == nil || podReturned.Spec.NodeName == node.Name) {
evictionResults[pod.Name] = status.PodEvictionResult{Pod: pod, TimedOut: true, Err: nil}
} else if err != nil && !kube_errors.IsNotFound(err) {
evictionResults[pod.Name] = status.PodEvictionResult{Pod: pod, TimedOut: true, Err: err}
} else {
evictionResults[pod.Name] = status.PodEvictionResult{Pod: pod, TimedOut: false, Err: nil}
}
}
return evictionResults, errors.NewAutoscalerError(errors.TransientError, "Failed to drain node %s/%s: pods remaining after timeout", node.Namespace, node.Name)
}