in pkg/controllers/nodeclaim/garbagecollection/instance_garbagecollection.go [57:94]
func (c *VirtualMachine) Reconcile(ctx context.Context) (reconcile.Result, error) {
ctx = injection.WithControllerName(ctx, "instance.garbagecollection")
// We LIST VMs on the CloudProvider BEFORE we grab NodeClaims/Nodes on the cluster so that we make sure that, if
// LISTing instances takes a long time, our information is more updated by the time we get to nodeclaim and Node LIST
// This works since our CloudProvider instances are deleted based on whether the NodeClaim exists or not, not vice-versa
retrieved, err := c.cloudProvider.List(ctx)
if err != nil {
return reconcile.Result{}, fmt.Errorf("listing cloudprovider VMs, %w", err)
}
managedRetrieved := lo.Filter(retrieved, func(nc *karpv1.NodeClaim, _ int) bool {
return nc.DeletionTimestamp.IsZero()
})
nodeClaimList := &karpv1.NodeClaimList{}
if err = c.kubeClient.List(ctx, nodeClaimList); err != nil {
return reconcile.Result{}, err
}
nodeList := &v1.NodeList{}
if err := c.kubeClient.List(ctx, nodeList); err != nil {
return reconcile.Result{}, err
}
resolvedProviderIDs := sets.New[string](lo.FilterMap(nodeClaimList.Items, func(n karpv1.NodeClaim, _ int) (string, bool) {
return n.Status.ProviderID, n.Status.ProviderID != ""
})...)
errs := make([]error, len(retrieved))
workqueue.ParallelizeUntil(ctx, 100, len(managedRetrieved), func(i int) {
if !resolvedProviderIDs.Has(managedRetrieved[i].Status.ProviderID) &&
time.Since(managedRetrieved[i].CreationTimestamp.Time) > time.Minute*5 {
errs[i] = c.garbageCollect(ctx, managedRetrieved[i], nodeList)
}
})
if err = multierr.Combine(errs...); err != nil {
return reconcile.Result{}, err
}
c.successfulCount++
return reconcile.Result{RequeueAfter: lo.Ternary(c.successfulCount <= 20, time.Second*10, time.Minute*2)}, nil
}