cmd/upgrade.go (361 lines of code) (raw):

// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. package cmd import ( "context" "fmt" "os" "path/filepath" "strings" "time" "github.com/Azure/aks-engine-azurestack/pkg/api" "github.com/Azure/aks-engine-azurestack/pkg/api/common" "github.com/Azure/aks-engine-azurestack/pkg/armhelpers" "github.com/Azure/aks-engine-azurestack/pkg/engine" "github.com/Azure/aks-engine-azurestack/pkg/helpers" "github.com/Azure/aks-engine-azurestack/pkg/helpers/to" "github.com/Azure/aks-engine-azurestack/pkg/i18n" "github.com/Azure/aks-engine-azurestack/pkg/operations/kubernetesupgrade" "github.com/blang/semver" "github.com/leonelquinteros/gotext" "github.com/pkg/errors" log "github.com/sirupsen/logrus" "github.com/spf13/cobra" ) const ( upgradeName = "upgrade" upgradeShortDescription = "Upgrade an existing AKS Engine-created Kubernetes cluster" upgradeLongDescription = "Upgrade an existing AKS Engine-created Kubernetes cluster, one node at a time" smalldiskWindowsImageIdentifier = "smalldisk" ctrdWindowsImageIdentifier = "ctrd" ) type upgradeCmd struct { authProvider // user input resourceGroupName string apiModelPath string deploymentDirectory string currentVersion string upgradeVersion string location string kubeconfigPath string timeoutInMinutes int cordonDrainTimeoutInMinutes int force bool controlPlaneOnly bool disableClusterInitComponentDuringUpgrade bool upgradeWindowsVHD bool // derived containerService *api.ContainerService apiVersion string client armhelpers.AKSEngineClient locale *gotext.Locale nameSuffix string agentPoolsToUpgrade map[string]bool timeout *time.Duration cordonDrainTimeout *time.Duration } func newUpgradeCmd() *cobra.Command { uc := upgradeCmd{ authProvider: &authArgs{}, } upgradeCmd := &cobra.Command{ Use: upgradeName, Short: upgradeShortDescription, Long: upgradeLongDescription, RunE: uc.run, } f := upgradeCmd.Flags() f.StringVarP(&uc.location, "location", "l", "", "location the cluster is deployed in (required)") f.StringVarP(&uc.resourceGroupName, "resource-group", "g", "", "the resource group where the cluster is deployed (required)") f.StringVarP(&uc.apiModelPath, "api-model", "m", "", "path to the generated apimodel.json file") f.StringVar(&uc.deploymentDirectory, "deployment-dir", "", "the location of the output from `generate`") f.StringVarP(&uc.upgradeVersion, "upgrade-version", "k", "", "desired kubernetes version (required)") f.StringVarP(&uc.kubeconfigPath, "kubeconfig", "b", "", "the path of the kubeconfig file") f.IntVar(&uc.timeoutInMinutes, "vm-timeout", -1, "how long to wait for each vm to be upgraded in minutes") f.IntVar(&uc.cordonDrainTimeoutInMinutes, "cordon-drain-timeout", -1, "how long to wait for each vm to be cordoned in minutes") f.BoolVarP(&uc.force, "force", "f", false, "force upgrading the cluster to desired version. Allows same version upgrades and downgrades.") f.BoolVarP(&uc.controlPlaneOnly, "control-plane-only", "", false, "upgrade control plane VMs only, do not upgrade node pools") f.BoolVarP(&uc.upgradeWindowsVHD, "upgrade-windows-vhd", "", true, "upgrade image reference of the Windows nodes") addAuthFlags(uc.getAuthArgs(), f) _ = f.MarkDeprecated("deployment-dir", "deployment-dir is no longer required for scale or upgrade. Please use --api-model.") return upgradeCmd } func (uc *upgradeCmd) validate(cmd *cobra.Command) error { var err error uc.locale, err = i18n.LoadTranslations() if err != nil { return errors.Wrap(err, "error loading translation files") } if uc.resourceGroupName == "" { _ = cmd.Usage() return errors.New("--resource-group must be specified") } if uc.location == "" { _ = cmd.Usage() return errors.New("--location must be specified") } uc.location = helpers.NormalizeAzureRegion(uc.location) if uc.timeoutInMinutes != -1 { timeout := time.Duration(uc.timeoutInMinutes) * time.Minute uc.timeout = &timeout } if uc.cordonDrainTimeoutInMinutes != -1 { cordonDrainTimeout := time.Duration(uc.cordonDrainTimeoutInMinutes) * time.Minute uc.cordonDrainTimeout = &cordonDrainTimeout } if uc.upgradeVersion == "" { _ = cmd.Usage() return errors.New("--upgrade-version must be specified") } if uc.apiModelPath == "" && uc.deploymentDirectory == "" { _ = cmd.Usage() return errors.New("--api-model must be specified") } if uc.apiModelPath != "" && uc.deploymentDirectory != "" { _ = cmd.Usage() return errors.New("ambiguous, please specify only one of --api-model and --deployment-dir") } return nil } func (uc *upgradeCmd) loadCluster() error { var err error ctx, cancel := context.WithTimeout(context.Background(), armhelpers.DefaultARMOperationTimeout) defer cancel() // Load apimodel from the directory. if uc.apiModelPath == "" { uc.apiModelPath = filepath.Join(uc.deploymentDirectory, apiModelFilename) } if _, err = os.Stat(uc.apiModelPath); os.IsNotExist(err) { return errors.Errorf("specified api model does not exist (%s)", uc.apiModelPath) } apiloader := &api.Apiloader{ Translator: &i18n.Translator{ Locale: uc.locale, }, } // Load the container service. uc.containerService, uc.apiVersion, err = apiloader.LoadContainerServiceFromFile(uc.apiModelPath, true, true, nil) if err != nil { return errors.Wrap(err, "error parsing the api model") } // Ensure there aren't known-breaking API model configurations if uc.containerService.Properties.MasterProfile.AvailabilityProfile == api.VirtualMachineScaleSets { return errors.Errorf("clusters with a VMSS control plane are not upgradable using `aks-engine upgrade`") } if uc.containerService.Properties.OrchestratorProfile != nil && uc.containerService.Properties.OrchestratorProfile.KubernetesConfig != nil && to.Bool(uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.EnableEncryptionWithExternalKms) && to.Bool(uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.UseManagedIdentity) && uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.UserAssignedID == "" { return errors.Errorf("clusters with enableEncryptionWithExternalKms=true and system-assigned identity are not upgradable using `aks-engine upgrade`") } // Set 60 minutes cordonDrainTimeout for Azure Stack Cloud to give it enough time to move around resources during Node Drain, // especially disk detach/attach operations. We still honor the user's input. if uc.cordonDrainTimeout == nil && uc.containerService.Properties.IsAzureStackCloud() { cordonDrainTimeout := time.Duration(60) * time.Minute uc.cordonDrainTimeout = &cordonDrainTimeout } // Use the Windows VHD associated with the aks-engine version if upgradeWindowsVHD is set to "true" if uc.upgradeWindowsVHD && uc.containerService.Properties.WindowsProfile != nil { windowsProfile := uc.containerService.Properties.WindowsProfile if api.ImagePublisherAndOfferMatch(windowsProfile, api.AKSWindowsServer2019ContainerDOSImageConfig) && strings.Contains(windowsProfile.WindowsSku, ctrdWindowsImageIdentifier) { windowsProfile.ImageVersion = api.AKSWindowsServer2019ContainerDOSImageConfig.ImageVersion windowsProfile.WindowsSku = api.AKSWindowsServer2019ContainerDOSImageConfig.ImageSku } else if api.ImagePublisherAndOfferMatch(windowsProfile, api.AKSWindowsServer2019OSImageConfig) && strings.Contains(windowsProfile.WindowsSku, smalldiskWindowsImageIdentifier) { windowsProfile.ImageVersion = api.AKSWindowsServer2019OSImageConfig.ImageVersion windowsProfile.WindowsSku = api.AKSWindowsServer2019OSImageConfig.ImageSku } else if api.ImagePublisherAndOfferMatch(windowsProfile, api.WindowsServer2019OSImageConfig) { windowsProfile.ImageVersion = api.WindowsServer2019OSImageConfig.ImageVersion windowsProfile.WindowsSku = api.WindowsServer2019OSImageConfig.ImageSku } } // Update the masterProfile and agentPoolProfiles distro for AzureStackCloud to use aks-ubuntu-18.04 instead of aks-ubuntu-16.04 if uc.containerService.Properties.IsAzureStackCloud() { if uc.containerService.Properties.MasterProfile.Distro == api.AKSUbuntu1604 { log.Infoln("Distro 'aks-ubuntu-16.04' is not longer supported on Azure Stack Hub, overwriting master profile distro to 'aks-ubuntu-22.04'") uc.containerService.Properties.MasterProfile.Distro = api.AKSUbuntu2204 } else if uc.containerService.Properties.MasterProfile.Distro == api.AKSUbuntu1804 { log.Infoln("Distro 'aks-ubuntu-18.04' is not longer supported on Azure Stack Hub, overwriting master profile distro to 'aks-ubuntu-22.04'") uc.containerService.Properties.MasterProfile.Distro = api.AKSUbuntu2204 } else if uc.containerService.Properties.MasterProfile.Distro == api.AKSUbuntu2004 { log.Infoln("Distro 'aks-ubuntu-20.04' is not longer supported on Azure Stack Hub, overwriting master profile distro to 'aks-ubuntu-22.04'") uc.containerService.Properties.MasterProfile.Distro = api.AKSUbuntu2204 } for _, app := range uc.containerService.Properties.AgentPoolProfiles { if app.Distro == api.AKSUbuntu1604 { log.Infoln(fmt.Sprintf("Distro 'aks-ubuntu-16.04' is not longer supported on Azure Stack Hub, overwriting agent pool profile %s distro to 'aks-ubuntu-22.04'", app.Name)) app.Distro = api.AKSUbuntu2204 } else if app.Distro == api.AKSUbuntu1804 { log.Infoln(fmt.Sprintf("Distro 'aks-ubuntu-18.04' is not longer supported on Azure Stack Hub, overwriting agent pool profile %s distro to 'aks-ubuntu-22.04'", app.Name)) app.Distro = api.AKSUbuntu2204 } else if app.Distro == api.AKSUbuntu2004 { log.Infoln(fmt.Sprintf("Distro 'aks-ubuntu-20.04' is not longer supported on Azure Stack Hub, overwriting agent pool profile %s distro to 'aks-ubuntu-22.04'", app.Name)) app.Distro = api.AKSUbuntu2204 } } } // Enforce UseCloudControllerManager for Kubernetes 1.21+ on Azure Stack cloud if uc.containerService.Properties.IsAzureStackCloud() && common.IsKubernetesVersionGe(uc.upgradeVersion, "1.21.0") { log.Infoln("The in-tree cloud provider is not longer supported on Azure Stack Hub for v1.21+ clusters, overwriting UseCloudControllerManager to 'true'") uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.UseCloudControllerManager = to.BoolPtr(true) } // Only containerd runtime is allowed for Kubernetes 1.24+ on Azure Stack cloud if uc.containerService.Properties.IsAzureStackCloud() && strings.EqualFold(uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntime, "docker") && common.IsKubernetesVersionGe(uc.upgradeVersion, "1.24.0") { log.Infoln("The docker runtime is no longer supported for v1.24+ clusters, overwriting ContainerRuntime to 'containerd'") uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntime = "containerd" } // The cluster-init component is a cluster create-only feature, temporarily disable if enabled if i := api.GetComponentsIndexByName(uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.Components, common.ClusterInitComponentName); i > -1 { if uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.Components[i].IsEnabled() { uc.disableClusterInitComponentDuringUpgrade = true uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.Components[i].Enabled = to.BoolPtr(false) } } if uc.containerService.Properties.IsCustomCloudProfile() { if err = writeCustomCloudProfile(uc.containerService); err != nil { return errors.Wrap(err, "error writing custom cloud profile") } if err = uc.containerService.Properties.SetCustomCloudSpec(api.AzureCustomCloudSpecParams{ IsUpgrade: true, IsScale: false, }); err != nil { return errors.Wrap(err, "error parsing the api model") } } if err = uc.getAuthArgs().validateAuthArgs(); err != nil { return err } // Set env var if custom cloud profile is not nil var env *api.Environment if uc.containerService != nil && uc.containerService.Properties != nil && uc.containerService.Properties.CustomCloudProfile != nil { env = uc.containerService.Properties.CustomCloudProfile.Environment } if uc.client, err = uc.getAuthArgs().getClient(env); err != nil { return errors.Wrap(err, "failed to get client") } _, err = uc.client.EnsureResourceGroup(ctx, uc.resourceGroupName, uc.location, nil) if err != nil { return errors.Wrap(err, "error ensuring resource group") } err = uc.initialize() if err != nil { return errors.Wrap(err, "error validating the api model") } return nil } func (uc *upgradeCmd) validateTargetVersion() error { // Get available upgrades for container service. orchestratorInfo, err := api.GetOrchestratorVersionProfile(uc.containerService.Properties.OrchestratorProfile, uc.containerService.Properties.HasWindows(), uc.containerService.Properties.IsAzureStackCloud()) if err != nil { return errors.Wrap(err, "error getting list of available upgrades") } found := false for _, up := range orchestratorInfo.Upgrades { if up.OrchestratorVersion == uc.upgradeVersion { found = true break } } if !found { return errors.Errorf("upgrading from Kubernetes version %s to version %s is not supported. To see a list of available upgrades, use 'aks-engine-azurestack get-versions --version %s'", uc.containerService.Properties.OrchestratorProfile.OrchestratorVersion, uc.upgradeVersion, uc.containerService.Properties.OrchestratorProfile.OrchestratorVersion) } return nil } func (uc *upgradeCmd) initialize() error { if uc.containerService.Location == "" { uc.containerService.Location = uc.location } else if uc.containerService.Location != uc.location { return errors.New("--location does not match api model location") } // Validate semver compatibility _, err := semver.Make(uc.upgradeVersion) if err != nil { return errors.Wrap(err, fmt.Sprintf("Invalid --upgrade-version value '%s', not a semver string", uc.upgradeVersion)) } if !uc.force { err := uc.validateTargetVersion() if err != nil { return errors.Wrap(err, "Invalid upgrade target version. Consider using --force if you really want to proceed") } } uc.currentVersion = uc.containerService.Properties.OrchestratorProfile.OrchestratorVersion uc.containerService.Properties.OrchestratorProfile.OrchestratorVersion = uc.upgradeVersion //allows to identify VMs in the resource group that belong to this cluster. uc.nameSuffix = uc.containerService.Properties.GetClusterID() log.Infoln(fmt.Sprintf("Upgrading cluster with name suffix: %s", uc.nameSuffix)) uc.agentPoolsToUpgrade = make(map[string]bool) uc.agentPoolsToUpgrade[kubernetesupgrade.MasterPoolName] = true for _, agentPool := range uc.containerService.Properties.AgentPoolProfiles { uc.agentPoolsToUpgrade[agentPool.Name] = true } return nil } func (uc *upgradeCmd) run(cmd *cobra.Command, args []string) error { err := uc.validate(cmd) if err != nil { return errors.Wrap(err, "validating upgrade command") } err = uc.loadCluster() if err != nil { return errors.Wrap(err, "loading existing cluster") } if uc.containerService.Properties.IsAzureStackCloud() { if err = uc.validateOSBaseImage(); err != nil { return errors.Wrapf(err, "validating OS base images required by %s", uc.apiModelPath) } } upgradeCluster := kubernetesupgrade.UpgradeCluster{ Translator: &i18n.Translator{ Locale: uc.locale, }, Logger: log.NewEntry(log.New()), Client: uc.client, StepTimeout: uc.timeout, CordonDrainTimeout: uc.cordonDrainTimeout, } upgradeCluster.ClusterTopology = kubernetesupgrade.ClusterTopology{} upgradeCluster.SubscriptionID = uc.getAuthArgs().SubscriptionID.String() upgradeCluster.ResourceGroup = uc.resourceGroupName upgradeCluster.DataModel = uc.containerService upgradeCluster.NameSuffix = uc.nameSuffix upgradeCluster.AgentPoolsToUpgrade = uc.agentPoolsToUpgrade upgradeCluster.Force = uc.force upgradeCluster.ControlPlaneOnly = uc.controlPlaneOnly var kubeConfig string if uc.kubeconfigPath != "" { var path string var content []byte path, err = filepath.Abs(uc.kubeconfigPath) if err != nil { return errors.Wrap(err, "reading --kubeconfig") } content, err = os.ReadFile(path) if err != nil { return errors.Wrap(err, "reading --kubeconfig") } kubeConfig = string(content) } else { kubeConfig, err = engine.GenerateKubeConfig(uc.containerService.Properties, uc.location) if err != nil { return errors.Wrap(err, "generating kubeconfig") } } upgradeCluster.CurrentVersion = uc.currentVersion if err = upgradeCluster.UpgradeCluster(uc.client, kubeConfig, BuildTag); err != nil { return errors.Wrap(err, "upgrading cluster") } // Save the new apimodel to reflect the cluster's state. // Restore the original cluster-init component enabled value, if it was disabled during upgrade if uc.disableClusterInitComponentDuringUpgrade { if i := api.GetComponentsIndexByName(uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.Components, common.ClusterInitComponentName); i > -1 { uc.containerService.Properties.OrchestratorProfile.KubernetesConfig.Components[i].Enabled = to.BoolPtr(true) } } apiloader := &api.Apiloader{ Translator: &i18n.Translator{ Locale: uc.locale, }, } b, err := apiloader.SerializeContainerService(uc.containerService, uc.apiVersion) if err != nil { return err } f := helpers.FileSaver{ Translator: &i18n.Translator{ Locale: uc.locale, }, } dir, file := filepath.Split(uc.apiModelPath) return f.SaveFile(dir, file, b) } // validateOSBaseImage checks if the OS image is available on the target cloud (ATM, Azure Stack only) func (uc *upgradeCmd) validateOSBaseImage() error { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() if err := armhelpers.ValidateRequiredImages(ctx, uc.location, uc.containerService.Properties, uc.client); err != nil { return errors.Wrap(err, "OS base image not available in target cloud") } return nil }