in pkg/neg/manager.go [480:588]
func (manager *syncerManager) garbageCollectNEGWithCRD() error {
deletionCandidates := map[string]*negv1beta1.ServiceNetworkEndpointGroup{}
negCRs := manager.svcNegLister.List()
for _, obj := range negCRs {
neg := obj.(*negv1beta1.ServiceNetworkEndpointGroup)
deletionCandidates[neg.Name] = neg
}
func() {
manager.mu.Lock()
defer manager.mu.Unlock()
for _, portInfoMap := range manager.svcPortMap {
for _, portInfo := range portInfoMap {
// Manager svcPortMap replicates the desired state of services, so svcPortMap is the source of truth
// and determining factor to find deletion candidates. In the best case, neg cr will have a deletion
// timestamp, the neg config will not exist in the svcPortMap, and both CR and neg will be deleted.
// In the situation a neg config is in the svcPortMap but the CR has a deletion timestamp, then
// neither the neg nor CR will not be deleted. In the situation a neg config is not in the svcPortMap,
// but the CR does not have a deletion timestamp, both CR and neg will be deleted.
if _, ok := deletionCandidates[portInfo.NegName]; ok {
delete(deletionCandidates, portInfo.NegName)
}
}
}
}()
// This section includes a potential race condition between deleting neg here and users adds the neg annotation.
// The worst outcome of the race condition is that neg is deleted in the end but user actually specifies a neg.
// This would be resolved (sync neg) when the next endpoint update or resync arrives.
// TODO: avoid race condition here
var errList []error
// Deletion candidate NEGs should be deleted from all zones, even ones that currently don't have any Ready nodes.
zones, err := manager.zoneGetter.ListZones(utils.AllNodesPredicate)
if err != nil {
errList = append(errList, fmt.Errorf("failed to get zones during garbage collection: %w", err))
}
// deleteNegOrReportErr will attempt to delete the specified NEG resource in the cloud. If an error
// occurs, it will report an error as an event on the given CR. If an error does occur, false will
// be returned to indicate that the CR should not be deleted.
deleteNegOrReportErr := func(name, zone string, cr *negv1beta1.ServiceNetworkEndpointGroup) bool {
expectedDesc := &utils.NegDescription{
ClusterUID: string(manager.kubeSystemUID),
Namespace: cr.Namespace,
ServiceName: cr.GetLabels()[negtypes.NegCRServiceNameKey],
Port: cr.GetLabels()[negtypes.NegCRServicePortKey],
}
if err := manager.ensureDeleteNetworkEndpointGroup(name, zone, expectedDesc); err != nil {
err = fmt.Errorf("failed to delete NEG %s in %s: %s", name, zone, err)
manager.recorder.Eventf(cr, v1.EventTypeWarning, negtypes.NegGCError, err.Error())
errList = append(errList, err)
// Error when deleting NEG and return false to indicate not to delete Neg CR
return false
}
return true
}
for _, cr := range deletionCandidates {
shouldDeleteNegCR := true
deleteByZone := len(cr.Status.NetworkEndpointGroups) == 0
klog.V(2).Infof("Deletion candidate %s/%s has %d NEG references", cr.Namespace, cr.Name, len(cr.Status.NetworkEndpointGroups))
for _, negRef := range cr.Status.NetworkEndpointGroups {
resourceID, err := cloud.ParseResourceURL(negRef.SelfLink)
if err != nil {
errList = append(errList, fmt.Errorf("failed to parse selflink for neg cr %s/%s: %s", cr.Namespace, cr.Name, err))
deleteByZone = true
continue
}
shouldDeleteNegCR = shouldDeleteNegCR && deleteNegOrReportErr(resourceID.Key.Name, resourceID.Key.Zone, cr)
}
if deleteByZone {
klog.V(2).Infof("Deletion candidate %s/%s has 0 NEG reference: %+v", cr.Namespace, cr.Name, cr)
for _, zone := range zones {
shouldDeleteNegCR = shouldDeleteNegCR && deleteNegOrReportErr(cr.Name, zone, cr)
}
}
if !shouldDeleteNegCR {
continue
}
func() {
manager.mu.Lock()
defer manager.mu.Unlock()
// Verify that the NEG is still not wanted before deleting the CR. Mitigates the possibility of the race
// condition mentioned above
svcKey := getServiceKey(cr.Namespace, cr.GetLabels()[negtypes.NegCRServiceNameKey])
portInfoMap := manager.svcPortMap[svcKey]
for _, portInfo := range portInfoMap {
if portInfo.NegName == cr.Name {
klog.V(2).Infof("NEG CR %s/%s is still desired, skipping deletion", cr.Namespace, cr.Name)
return
}
}
klog.V(2).Infof("Deleting NEG CR %s/%s", cr.Namespace, cr.Name)
if err := deleteSvcNegCR(manager.svcNegClient, cr); err != nil {
errList = append(errList, err)
}
}()
}
return utilerrors.NewAggregate(errList)
}