func()

in npm/pkg/dataplane/policies/chain-management_linux.go [298:442]


func (pMgr *PolicyManager) cleanupOtherIptables() error {
	hadNFT := util.Iptables == util.IptablesNft
	if hadNFT {
		klog.Info("detected nft iptables. cleaning up legacy iptables")
		util.SetIptablesToLegacy()
	} else {
		klog.Info("detected legacy iptables. cleaning up nft iptables")
		util.SetIptablesToNft()
	}

	defer func() {
		if hadNFT {
			klog.Info("cleaned up legacy iptables")
			util.SetIptablesToNft()
		} else {
			klog.Info("cleaned up nft tables")
			util.SetIptablesToLegacy()
		}
	}()

	deletedJumpRule := false

	// 1.1. delete the deprecated jump to AZURE-NPM
	errCode, err := pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...)
	if errCode == 0 {
		klog.Infof("[cleanup] deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain")
		deletedJumpRule = true
	} else if err != nil {
		metrics.SendErrorLogAndMetric(util.IptmID,
			"[cleanup] failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s",
			errCode, err.Error())
	}

	// 1.2. delete the jump to AZURE-NPM
	errCode, err = pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, jumpFromForwardToAzureChainArgs...)
	if errCode == 0 {
		deletedJumpRule = true
		klog.Infof("[cleanup] deleted jump rule from FORWARD chain to AZURE-NPM chain")
	} else if err != nil {
		metrics.SendErrorLogAndMetric(util.IptmID,
			"[cleanup] failed to delete jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s",
			errCode, err.Error())
	}

	// 2. get current chains
	currentChains, err := ioutil.AllCurrentAzureChains(pMgr.ioShim.Exec, util.IptablesDefaultWaitTime)
	if err != nil {
		return npmerrors.SimpleErrorWrapper("[cleanup] failed to get current chains for bootup", err)
	}

	if len(currentChains) == 0 {
		klog.Info("no chains to cleanup")
		return nil
	}

	klog.Infof("[cleanup] %d chains to clean up", len(currentChains))

	// 3.1. try to flush all chains at once
	chains := make([]string, 0, len(currentChains))
	_, hasAzureChain := currentChains[util.IptablesAzureChain]
	if hasAzureChain {
		// putting AZURE-NPM chain first is required for proper unit testing (for determinancy in destroying chains)
		chains = append(chains, util.IptablesAzureChain)
	}
	for chain := range currentChains {
		if chain == util.IptablesAzureChain {
			// putting AZURE-NPM chain first is required for proper unit testing (for determinancy in destroying chains)
			continue
		}
		chains = append(chains, chain)
	}

	creator := pMgr.creatorForCleanup(chains)
	if err := restore(creator); err != nil {
		msg := "[cleanup] failed to flush all chains with error: %s"
		klog.Infof(msg, err.Error())
		metrics.SendErrorLogAndMetric(util.IptmID, msg, err.Error())

		// 3.2. if we failed to flush all chains, then try to flush and delete them one by one
		var aggregateError error
		if _, ok := currentChains[util.IptablesAzureChain]; ok {
			_, err := pMgr.runIPTablesCommand(util.IptablesFlushFlag, util.IptablesAzureChain)
			aggregateError = err
			if err != nil && !deletedJumpRule {
				// fixes #3088
				// if we failed to delete a jump rule to AZURE-NPM and we failed to flush AZURE-NPM chain,
				// then there is risk that there is a jump rule to AZURE-NPM, which in turn has rules which could lead to allowing or dropping a packet.
				// We have failed to cleanup the other iptables rules, and there is no guarantee that packets will be processed correctly now.
				// So we must crash and retry.
				return npmerrors.SimpleErrorWrapper("[cleanup] must crash and retry. failed to delete jump rule and flush AZURE-NPM chain with error", err)
			}
		}

		for chain := range currentChains {
			if chain == util.IptablesAzureChain {
				// already flushed above
				continue
			}

			errCode, err := pMgr.runIPTablesCommand(util.IptablesFlushFlag, chain)
			if err != nil && errCode != doesNotExistErrorCode {
				// NOTE: if we fail to flush or delete the chain, then we will never clean it up in the future.
				// This is zero-day behavior since NPM supported nft (we used to mark the chain stale, but this would not have worked as expected).
				// NPM currently has no mechanism for retrying flush/delete for a chain from the other iptables version (other than the AZURE-NPM chain which is handled above).
				currentErrString := fmt.Sprintf("failed to flush chain %s with err [%v]", chain, err)
				if aggregateError == nil {
					aggregateError = npmerrors.SimpleError(currentErrString)
				} else {
					aggregateError = npmerrors.SimpleErrorWrapper(currentErrString+" and had previous error", aggregateError)
				}
			}
		}

		if aggregateError != nil {
			metrics.SendErrorLogAndMetric(util.IptmID,
				"[cleanup] benign failure to flush chains with error: %s",
				aggregateError.Error())
		}
	}

	// 4. delete all chains
	var aggregateError error
	for _, chain := range chains {
		errCode, err := pMgr.runIPTablesCommand(util.IptablesDestroyFlag, chain)
		if err != nil && errCode != doesNotExistErrorCode {
			// NOTE: if we fail to flush or delete the chain, then we will never clean it up in the future.
			// This is zero-day behavior since NPM supported nft (we used to mark the chain stale, but this would not have worked as expected).
			// NPM currently has no mechanism for retrying flush/delete for a chain from the other iptables version (other than the AZURE-NPM chain which is handled above).
			currentErrString := fmt.Sprintf("failed to delete chain %s with err [%v]", chain, err)
			if aggregateError == nil {
				aggregateError = npmerrors.SimpleError(currentErrString)
			} else {
				aggregateError = npmerrors.SimpleErrorWrapper(currentErrString+" and had previous error", aggregateError)
			}
		}
	}

	if aggregateError != nil {
		metrics.SendErrorLogAndMetric(util.IptmID,
			"[cleanup] benign failure to delete chains with error: %s",
			aggregateError.Error())
	}

	return nil
}