in internal/webhook/eviction.go [27:117]
func (e *EvictionHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
logger := log.FromContext(ctx)
logger.Info("Received eviction request", "namespace", req.Namespace, "podname", req.Name)
currentEviction := pdbautoscaler.Eviction{
PodName: req.Name,
EvictionTime: metav1.Now(),
}
// Fetch the pod to get its labels
pod := &corev1.Pod{}
err := e.Client.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: req.Name}, pod)
if err != nil {
logger.Error(err, "Error: Unable to fetch Pod")
return admission.Errored(http.StatusInternalServerError, err)
}
podObj := pod.DeepCopy()
// List all EvictionAutoScalers in the namespace. Is this expensive for every eviction are we cacching this list and pdbs?
EvictionAutoScalerList := &pdbautoscaler.EvictionAutoScalerList{}
err = e.Client.List(ctx, EvictionAutoScalerList, &client.ListOptions{Namespace: req.Namespace})
if err != nil {
logger.Error(err, "Error: Unable to list EvictionAutoScalers")
return admission.Errored(http.StatusInternalServerError, err)
}
// Find the applicable EvictionAutoScaler
var applicableEvictionAutoScaler *pdbautoscaler.EvictionAutoScaler
for _, EvictionAutoScaler := range EvictionAutoScalerList.Items {
EvictionAutoScaler := EvictionAutoScaler
// Fetch the associated PDB
pdb := &policyv1.PodDisruptionBudget{}
err := e.Client.Get(ctx, types.NamespacedName{Name: EvictionAutoScaler.Name, Namespace: EvictionAutoScaler.Namespace}, pdb)
if err != nil {
logger.Error(err, "Error: Unable to fetch PDB:", "pdbname", EvictionAutoScaler.Name)
return admission.Errored(http.StatusInternalServerError, err)
}
// Check if the PDB selector matches the evicted pod's labels
selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector)
if err != nil {
logger.Error(err, "Error: Invalid PDB selector", "pdbname", EvictionAutoScaler.Name)
continue
}
if selector.Matches(labels.Set(pod.Labels)) {
applicableEvictionAutoScaler = &EvictionAutoScaler
break
}
}
if applicableEvictionAutoScaler == nil {
logger.Info("No applicable EvictionAutoScaler found")
return admission.Allowed("no applicable EvictionAutoScaler")
}
logger.Info("Found EvictionAutoScaler", "name", applicableEvictionAutoScaler.Name)
updatedpod := podutil.UpdatePodCondition(&podObj.Status, &v1.PodCondition{
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "EvictionAttempt",
Message: "eviction attempt recorded by eviction webhook",
})
if updatedpod {
if err := e.Client.Status().Update(ctx, podObj); err != nil {
logger.Error(err, "Error: Unable to update Pod status")
//don't fail yet still want to try and update the EvictionAutoScaler
}
}
// want to rate limit on mass evictions but also if we slow down too much we may miss last eviction and not scale down.
//if applicableEvictionAutoScaler.Spec.LastEviction.EvictionTime.Time.Sub(currentEviction.EvictionTime.Time) < time.Second {
// return admission.Allowed("eviction allowed")
//}
applicableEvictionAutoScaler.Spec.LastEviction = currentEviction
err = e.Client.Update(ctx, applicableEvictionAutoScaler)
if err != nil {
//handle conflicts when many evictions happen in parallel? or doesn't matter if we lose the conficts
logger.Error(err, "Unable to update EvictionAutoScaler status")
return admission.Errored(http.StatusInternalServerError, err) //Is this a problem if webhook doesn't ignore failures?
}
logger.Info("Eviction logged successfully", "podName", req.Name, "evictionTime", currentEviction.EvictionTime)
return admission.Allowed("eviction allowed")
}