cni/network/invoker_cns.go (437 lines of code) (raw):
package network
import (
"context"
"encoding/json"
"fmt"
"net"
"github.com/Azure/azure-container-networking/cni"
"github.com/Azure/azure-container-networking/cni/log"
"github.com/Azure/azure-container-networking/cni/util"
"github.com/Azure/azure-container-networking/cns"
cnscli "github.com/Azure/azure-container-networking/cns/client"
"github.com/Azure/azure-container-networking/cns/fsnotify"
"github.com/Azure/azure-container-networking/iptables"
"github.com/Azure/azure-container-networking/network"
"github.com/Azure/azure-container-networking/network/networkutils"
"github.com/Azure/azure-container-networking/network/policy"
cniSkel "github.com/containernetworking/cni/pkg/skel"
"github.com/pkg/errors"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
const (
expectedNumInterfacesWithDefaultRoutes = 1
)
var (
errEmptyCNIArgs = errors.New("empty CNI cmd args not allowed")
errInvalidArgs = errors.New("invalid arg(s)")
errInvalidDefaultRouting = errors.New("add result requires exactly one interface with default routes")
errInvalidGatewayIP = errors.New("invalid gateway IP")
overlayGatewayV6IP = "fe80::1234:5678:9abc"
watcherPath = "/var/run/azure-vnet/deleteIDs"
)
type CNSIPAMInvoker struct {
podName string
podNamespace string
cnsClient cnsclient
executionMode util.ExecutionMode
ipamMode util.IpamMode
}
type IPResultInfo struct {
podIPAddress string
ncSubnetPrefix uint8
ncPrimaryIP string
ncGatewayIPAddress string
hostSubnet string
hostPrimaryIP string
hostGateway string
nicType cns.NICType
macAddress string
skipDefaultRoutes bool
routes []cns.Route
pnpID string
endpointPolicies []policy.Policy
}
func (i IPResultInfo) MarshalLogObject(encoder zapcore.ObjectEncoder) error {
encoder.AddString("podIPAddress", i.podIPAddress)
encoder.AddUint8("ncSubnetPrefix", i.ncSubnetPrefix)
encoder.AddString("ncPrimaryIP", i.ncPrimaryIP)
encoder.AddString("ncGatewayIPAddress", i.ncGatewayIPAddress)
encoder.AddString("hostSubnet", i.hostSubnet)
encoder.AddString("hostPrimaryIP", i.hostPrimaryIP)
encoder.AddString("hostGateway", i.hostGateway)
encoder.AddString("nicType", string(i.nicType))
encoder.AddString("macAddress", i.macAddress)
encoder.AddBool("skipDefaultRoutes", i.skipDefaultRoutes)
encoder.AddString("routes", fmt.Sprintf("%+v", i.routes))
return nil
}
func NewCNSInvoker(podName, namespace string, cnsClient cnsclient, executionMode util.ExecutionMode, ipamMode util.IpamMode) *CNSIPAMInvoker {
return &CNSIPAMInvoker{
podName: podName,
podNamespace: namespace,
cnsClient: cnsClient,
executionMode: executionMode,
ipamMode: ipamMode,
}
}
// Add uses the requestipconfig API in cns, and returns ipv4 and a nil ipv6 as CNS doesn't support IPv6 yet
func (invoker *CNSIPAMInvoker) Add(addConfig IPAMAddConfig) (IPAMAddResult, error) {
// Parse Pod arguments.
podInfo := cns.KubernetesPodInfo{
PodName: invoker.podName,
PodNamespace: invoker.podNamespace,
}
orchestratorContext, err := json.Marshal(podInfo)
if err != nil {
logger.Info(podInfo.PodName)
return IPAMAddResult{}, errors.Wrap(err, "Failed to unmarshal orchestrator context during add: %w")
}
if addConfig.args == nil {
return IPAMAddResult{}, errEmptyCNIArgs
}
ipconfigs := cns.IPConfigsRequest{
OrchestratorContext: orchestratorContext,
PodInterfaceID: GetEndpointID(addConfig.args),
InfraContainerID: addConfig.args.ContainerID,
}
logger.Info("Requesting IP for pod using ipconfig",
zap.Any("pod", podInfo),
zap.Any("ipconfig", ipconfigs))
response, err := invoker.cnsClient.RequestIPs(context.TODO(), ipconfigs)
if err != nil {
if cnscli.IsUnsupportedAPI(err) {
// If RequestIPs is not supported by CNS, use RequestIPAddress API
logger.Error("RequestIPs not supported by CNS. Invoking RequestIPAddress API",
zap.Any("infracontainerid", ipconfigs.InfraContainerID))
ipconfig := cns.IPConfigRequest{
OrchestratorContext: orchestratorContext,
PodInterfaceID: GetEndpointID(addConfig.args),
InfraContainerID: addConfig.args.ContainerID,
}
res, errRequestIP := invoker.cnsClient.RequestIPAddress(context.TODO(), ipconfig)
if errRequestIP != nil {
// if the old API fails as well then we just return the error
logger.Error("Failed to request IP address from CNS using RequestIPAddress",
zap.Any("infracontainerid", ipconfig.InfraContainerID),
zap.Error(errRequestIP))
return IPAMAddResult{}, errors.Wrap(errRequestIP, "Failed to get IP address from CNS")
}
response = &cns.IPConfigsResponse{
Response: res.Response,
PodIPInfo: []cns.PodIpInfo{
res.PodIpInfo,
},
}
} else {
logger.Info("Failed to get IP address from CNS",
zap.Any("response", response))
return IPAMAddResult{}, errors.Wrap(err, "Failed to get IP address from CNS")
}
}
addResult := IPAMAddResult{interfaceInfo: make(map[string]network.InterfaceInfo)}
numInterfacesWithDefaultRoutes := 0
for i := 0; i < len(response.PodIPInfo); i++ {
info := IPResultInfo{
podIPAddress: response.PodIPInfo[i].PodIPConfig.IPAddress,
ncSubnetPrefix: response.PodIPInfo[i].NetworkContainerPrimaryIPConfig.IPSubnet.PrefixLength,
ncPrimaryIP: response.PodIPInfo[i].NetworkContainerPrimaryIPConfig.IPSubnet.IPAddress,
ncGatewayIPAddress: response.PodIPInfo[i].NetworkContainerPrimaryIPConfig.GatewayIPAddress,
hostSubnet: response.PodIPInfo[i].HostPrimaryIPInfo.Subnet,
hostPrimaryIP: response.PodIPInfo[i].HostPrimaryIPInfo.PrimaryIP,
hostGateway: response.PodIPInfo[i].HostPrimaryIPInfo.Gateway,
nicType: response.PodIPInfo[i].NICType,
macAddress: response.PodIPInfo[i].MacAddress,
skipDefaultRoutes: response.PodIPInfo[i].SkipDefaultRoutes,
routes: response.PodIPInfo[i].Routes,
pnpID: response.PodIPInfo[i].PnPID,
endpointPolicies: response.PodIPInfo[i].EndpointPolicies,
}
logger.Info("Received info for pod",
zap.Any("ipInfo", info),
zap.Any("podInfo", podInfo))
//nolint:exhaustive // ignore exhaustive types check
// Do we want to leverage this lint skip in other places of our code?
key := invoker.getInterfaceInfoKey(info.nicType, info.macAddress)
switch info.nicType {
case cns.NodeNetworkInterfaceFrontendNIC:
// only handling single v4 PodIPInfo for NodeNetworkInterfaceFrontendNIC and AccelnetNIC at the moment, will have to update once v6 gets added
if !info.skipDefaultRoutes {
numInterfacesWithDefaultRoutes++
}
// Add secondary interface info from podIPInfo to ipamAddResult
info.hostSubnet = response.PodIPInfo[i].HostPrimaryIPInfo.Subnet
info.hostPrimaryIP = response.PodIPInfo[i].HostPrimaryIPInfo.PrimaryIP
info.hostGateway = response.PodIPInfo[i].HostPrimaryIPInfo.Gateway
if err := configureSecondaryAddResult(&info, &addResult, &response.PodIPInfo[i].PodIPConfig, key); err != nil {
return IPAMAddResult{}, err
}
case cns.BackendNIC:
// TODO: check whether setting default route on IB interface
// handle ipv4 PodIPInfo for BackendNIC
if err := addBackendNICToResult(&info, &addResult, key); err != nil {
return IPAMAddResult{}, err
}
case cns.InfraNIC, "":
// if we change from legacy cns, the nicType will be empty, so we assume it is infra nic
info.nicType = cns.InfraNIC
// only count dualstack interface once
_, exist := addResult.interfaceInfo[key]
if !exist {
addResult.interfaceInfo[key] = network.InterfaceInfo{}
if !info.skipDefaultRoutes {
numInterfacesWithDefaultRoutes++
}
}
overlayMode := (invoker.ipamMode == util.V4Overlay) || (invoker.ipamMode == util.DualStackOverlay) || (invoker.ipamMode == util.Overlay)
if err := configureDefaultAddResult(&info, &addConfig, &addResult, overlayMode, key); err != nil {
return IPAMAddResult{}, err
}
default:
logger.Warn("Unknown NIC type received from cns pod ip info", zap.String("nicType", string(info.nicType)))
}
}
// Make sure default routes exist for 1 interface
if numInterfacesWithDefaultRoutes != expectedNumInterfacesWithDefaultRoutes {
return IPAMAddResult{}, errInvalidDefaultRouting
}
return addResult, nil
}
func setHostOptions(ncSubnetPrefix *net.IPNet, options map[string]interface{}, info *IPResultInfo) error {
// get the host ip
hostIP := net.ParseIP(info.hostPrimaryIP)
if hostIP == nil {
return fmt.Errorf("Host IP address %v from response is invalid", info.hostPrimaryIP)
}
// get host gateway
hostGateway := net.ParseIP(info.hostGateway)
if hostGateway == nil {
return fmt.Errorf("Host Gateway %v from response is invalid", info.hostGateway)
}
// this route is needed when the vm on subnet A needs to send traffic to a pod in subnet B on a different vm
options[network.RoutesKey] = []network.RouteInfo{
{
Dst: *ncSubnetPrefix,
Gw: hostGateway,
},
}
azureDNSUDPMatch := fmt.Sprintf(" -m addrtype ! --dst-type local -s %s -d %s -p %s --dport %d", ncSubnetPrefix.String(), networkutils.AzureDNS, iptables.UDP, iptables.DNSPort)
azureDNSTCPMatch := fmt.Sprintf(" -m addrtype ! --dst-type local -s %s -d %s -p %s --dport %d", ncSubnetPrefix.String(), networkutils.AzureDNS, iptables.TCP, iptables.DNSPort)
azureIMDSMatch := fmt.Sprintf(" -m addrtype ! --dst-type local -s %s -d %s -p %s --dport %d", ncSubnetPrefix.String(), networkutils.AzureIMDS, iptables.TCP, iptables.HTTPPort)
snatPrimaryIPJump := fmt.Sprintf("%s --to %s", iptables.Snat, info.ncPrimaryIP)
// we need to snat IMDS traffic to node IP, this sets up snat '--to'
snatHostIPJump := fmt.Sprintf("%s --to %s", iptables.Snat, info.hostPrimaryIP)
iptablesClient := iptables.NewClient()
var iptableCmds []iptables.IPTableEntry
if !iptablesClient.ChainExists(iptables.V4, iptables.Nat, iptables.Swift) {
iptableCmds = append(iptableCmds, iptablesClient.GetCreateChainCmd(iptables.V4, iptables.Nat, iptables.Swift))
}
if !iptablesClient.RuleExists(iptables.V4, iptables.Nat, iptables.Postrouting, "", iptables.Swift) {
iptableCmds = append(iptableCmds, iptablesClient.GetAppendIptableRuleCmd(iptables.V4, iptables.Nat, iptables.Postrouting, "", iptables.Swift))
}
if !iptablesClient.RuleExists(iptables.V4, iptables.Nat, iptables.Swift, azureDNSUDPMatch, snatPrimaryIPJump) {
iptableCmds = append(iptableCmds, iptablesClient.GetInsertIptableRuleCmd(iptables.V4, iptables.Nat, iptables.Swift, azureDNSUDPMatch, snatPrimaryIPJump))
}
if !iptablesClient.RuleExists(iptables.V4, iptables.Nat, iptables.Swift, azureDNSTCPMatch, snatPrimaryIPJump) {
iptableCmds = append(iptableCmds, iptablesClient.GetInsertIptableRuleCmd(iptables.V4, iptables.Nat, iptables.Swift, azureDNSTCPMatch, snatPrimaryIPJump))
}
if !iptablesClient.RuleExists(iptables.V4, iptables.Nat, iptables.Swift, azureIMDSMatch, snatHostIPJump) {
iptableCmds = append(iptableCmds, iptablesClient.GetInsertIptableRuleCmd(iptables.V4, iptables.Nat, iptables.Swift, azureIMDSMatch, snatHostIPJump))
}
options[network.IPTablesKey] = iptableCmds
return nil
}
// Delete calls into the releaseipconfiguration API in CNS
func (invoker *CNSIPAMInvoker) Delete(address *net.IPNet, nwCfg *cni.NetworkConfig, args *cniSkel.CmdArgs, _ map[string]interface{}) error { //nolint
var connectionErr *cnscli.ConnectionFailureErr
// Parse Pod arguments.
podInfo := cns.KubernetesPodInfo{
PodName: invoker.podName,
PodNamespace: invoker.podNamespace,
}
orchestratorContext, err := json.Marshal(podInfo)
if err != nil {
return err
}
if args == nil {
return errEmptyCNIArgs
}
ipConfigs := cns.IPConfigsRequest{
OrchestratorContext: orchestratorContext,
PodInterfaceID: GetEndpointID(args),
InfraContainerID: args.ContainerID,
}
if address != nil {
ipConfigs.DesiredIPAddresses = append(ipConfigs.DesiredIPAddresses, address.IP.String())
} else {
logger.Info("CNS invoker called with empty IP address")
}
if err := invoker.cnsClient.ReleaseIPs(context.TODO(), ipConfigs); err != nil {
if cnscli.IsUnsupportedAPI(err) {
// If ReleaseIPs is not supported by CNS, use ReleaseIPAddress API
logger.Error("ReleaseIPs not supported by CNS. Invoking ReleaseIPAddress API",
zap.Any("ipconfigs", ipConfigs))
ipConfig := cns.IPConfigRequest{
OrchestratorContext: orchestratorContext,
PodInterfaceID: GetEndpointID(args),
InfraContainerID: args.ContainerID,
}
if err = invoker.cnsClient.ReleaseIPAddress(context.TODO(), ipConfig); err != nil {
if errors.As(err, &connectionErr) {
addErr := fsnotify.AddFile(ipConfigs.PodInterfaceID, args.ContainerID, watcherPath)
if addErr != nil {
logger.Error("Failed to add file to watcher (unsupported api path)",
zap.String("podInterfaceID", ipConfigs.PodInterfaceID), zap.String("containerID", args.ContainerID), zap.Error(log.NewErrorWithoutStackTrace(addErr)))
return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s and podInterfaceID %s (unsupported api path)", args.ContainerID, ipConfigs.PodInterfaceID))
}
} else {
logger.Error("Failed to release IP address from CNS using ReleaseIPAddress ",
zap.String("infracontainerid", ipConfigs.InfraContainerID),
zap.Error(err))
return errors.Wrap(err, fmt.Sprintf("failed to release IP %v using ReleaseIPAddress with err ", ipConfig.DesiredIPAddress)+"%w")
}
}
} else {
if errors.As(err, &connectionErr) {
addErr := fsnotify.AddFile(ipConfigs.PodInterfaceID, args.ContainerID, watcherPath)
if addErr != nil {
logger.Error("Failed to add file to watcher", zap.String("podInterfaceID", ipConfigs.PodInterfaceID), zap.String("containerID", args.ContainerID),
zap.Error(log.NewErrorWithoutStackTrace(addErr)))
return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s and podInterfaceID %s", args.ContainerID, ipConfigs.PodInterfaceID))
}
} else {
logger.Error("Failed to release IP address",
zap.String("infracontainerid", ipConfigs.InfraContainerID),
zap.Error(err))
return errors.Wrap(err, fmt.Sprintf("failed to release IP %v using ReleaseIPs with err ", ipConfigs.DesiredIPAddresses)+"%w")
}
}
}
return nil
}
func getRoutes(cnsRoutes []cns.Route, skipDefaultRoutes bool) ([]network.RouteInfo, error) {
routes := make([]network.RouteInfo, 0)
for _, route := range cnsRoutes {
_, dst, routeErr := net.ParseCIDR(route.IPAddress)
if routeErr != nil {
return nil, fmt.Errorf("unable to parse destination %s: %w", route.IPAddress, routeErr)
}
gw := net.ParseIP(route.GatewayIPAddress)
if gw == nil && skipDefaultRoutes {
return nil, errors.Wrap(errInvalidGatewayIP, route.GatewayIPAddress)
}
routes = append(routes,
network.RouteInfo{
Dst: *dst,
Gw: gw,
})
}
return routes, nil
}
func configureDefaultAddResult(info *IPResultInfo, addConfig *IPAMAddConfig, addResult *IPAMAddResult, overlayMode bool, key string) error {
// set the NC Primary IP in options
// SNATIPKey is not set for ipv6
if net.ParseIP(info.ncPrimaryIP).To4() != nil {
addConfig.options[network.SNATIPKey] = info.ncPrimaryIP
}
ip, ncIPNet, err := net.ParseCIDR(info.podIPAddress + "/" + fmt.Sprint(info.ncSubnetPrefix))
if ip == nil || err != nil {
return errors.Wrap(err, "Unable to parse IP from response: "+info.podIPAddress+" with err %w")
}
ncgw := net.ParseIP(info.ncGatewayIPAddress)
if ncgw == nil {
// TODO: Remove v4overlay and dualstackoverlay options, after 'overlay' rolls out in AKS-RP
if !overlayMode {
return errors.Wrap(errInvalidArgs, "%w: Gateway address "+info.ncGatewayIPAddress+" from response is invalid")
}
if net.ParseIP(info.podIPAddress).To4() != nil { //nolint:gocritic
ncgw, err = getOverlayGateway(ncIPNet)
if err != nil {
return err
}
} else if net.ParseIP(info.podIPAddress).To16() != nil {
ncgw = net.ParseIP(overlayGatewayV6IP)
} else {
return errors.Wrap(err, "No podIPAddress is found: %w")
}
}
// get the name of the primary IP address
_, hostIPNet, err := net.ParseCIDR(info.hostSubnet)
if err != nil {
return errors.Wrap(err, "unable to parse hostSubnet")
}
if ip := net.ParseIP(info.podIPAddress); ip != nil {
defaultRouteDstPrefix := network.Ipv4DefaultRouteDstPrefix
if ip.To4() == nil {
defaultRouteDstPrefix = network.Ipv6DefaultRouteDstPrefix
addResult.ipv6Enabled = true
}
ipConfigs := addResult.interfaceInfo[key].IPConfigs
ipConfigs = append(ipConfigs,
&network.IPConfig{
Address: net.IPNet{
IP: ip,
Mask: ncIPNet.Mask,
},
Gateway: ncgw,
})
routes, getRoutesErr := getRoutes(info.routes, info.skipDefaultRoutes)
if getRoutesErr != nil {
return getRoutesErr
}
resRoute := addResult.interfaceInfo[key].Routes
if len(routes) > 0 {
resRoute = append(resRoute, routes...)
} else { // add default routes if none are provided
resRoute = append(resRoute, network.RouteInfo{
Dst: defaultRouteDstPrefix,
Gw: ncgw,
})
}
// if we have multiple infra ip result infos, we effectively append routes and ip configs to that same interface info each time
// the host subnet prefix (in ipv4 or ipv6) will always refer to the same interface regardless of which ip result info we look at
addResult.interfaceInfo[key] = network.InterfaceInfo{
NICType: cns.InfraNIC,
SkipDefaultRoutes: info.skipDefaultRoutes,
IPConfigs: ipConfigs,
Routes: resRoute,
HostSubnetPrefix: *hostIPNet,
EndpointPolicies: info.endpointPolicies,
}
}
// set subnet prefix for host vm
// setHostOptions will execute if IPAM mode is not v4 overlay and not dualStackOverlay mode
// TODO: Remove v4overlay and dualstackoverlay options, after 'overlay' rolls out in AKS-RP
if !overlayMode {
if err := setHostOptions(ncIPNet, addConfig.options, info); err != nil {
return err
}
}
return nil
}
func configureSecondaryAddResult(info *IPResultInfo, addResult *IPAMAddResult, podIPConfig *cns.IPSubnet, key string) error {
ip, ipnet, err := podIPConfig.GetIPNet()
if ip == nil {
return errors.Wrap(err, "Unable to parse IP from response: "+info.podIPAddress+" with err %w")
}
macAddress, err := net.ParseMAC(info.macAddress)
if err != nil {
logger.Error("Invalid mac address", zap.Error(err))
return errors.Wrap(err, "Invalid mac address")
}
routes, err := getRoutes(info.routes, info.skipDefaultRoutes)
if err != nil {
return err
}
addResult.interfaceInfo[key] = network.InterfaceInfo{
IPConfigs: []*network.IPConfig{
{
Address: net.IPNet{
IP: ip,
Mask: ipnet.Mask,
},
Gateway: net.ParseIP(info.ncGatewayIPAddress),
},
},
Routes: routes,
NICType: info.nicType,
MacAddress: macAddress,
SkipDefaultRoutes: info.skipDefaultRoutes,
}
return nil
}
func addBackendNICToResult(info *IPResultInfo, addResult *IPAMAddResult, key string) error {
macAddress, err := net.ParseMAC(info.macAddress)
if err != nil {
logger.Error("Invalid mac address", zap.Error(err))
return errors.Wrap(err, "Invalid mac address")
}
// return error if pnp id is missing in cns goalstate
if info.pnpID == "" {
logger.Error("pnp id is not received from cns")
return errors.Wrap(err, "pnp id is not received from cns")
}
addResult.interfaceInfo[key] = network.InterfaceInfo{
NICType: info.nicType,
MacAddress: macAddress,
SkipDefaultRoutes: info.skipDefaultRoutes,
PnPID: info.pnpID,
}
return nil
}
func (invoker *CNSIPAMInvoker) getInterfaceInfoKey(nicType cns.NICType, macAddress string) string {
if nicType == cns.NodeNetworkInterfaceFrontendNIC || nicType == cns.BackendNIC {
return macAddress
}
return string(nicType)
}