pkg/providers/tinkerbell/upgrade.go (569 lines of code) (raw):

package tinkerbell import ( "context" "errors" "fmt" "reflect" tinkv1alpha1 "github.com/tinkerbell/tink/pkg/apis/core/v1alpha1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kerrors "k8s.io/apimachinery/pkg/util/errors" "github.com/aws/eks-anywhere/pkg/api/v1alpha1" rufiov1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1/thirdparty/tinkerbell/rufio" "github.com/aws/eks-anywhere/pkg/cluster" "github.com/aws/eks-anywhere/pkg/collection" "github.com/aws/eks-anywhere/pkg/constants" "github.com/aws/eks-anywhere/pkg/executables" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/rufiounreleased" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/stack" "github.com/aws/eks-anywhere/pkg/types" "github.com/aws/eks-anywhere/pkg/utils/yaml" ) func needsNewControlPlaneTemplate(oldSpec, newSpec *cluster.Spec) bool { // Another option is to generate MachineTemplates based on the old and new eksa spec, // remove the name field and compare them with DeepEqual // We plan to approach this way since it's more flexible to add/remove fields and test out for validation if oldSpec.Cluster.Spec.KubernetesVersion != newSpec.Cluster.Spec.KubernetesVersion { return true } if oldSpec.Bundles.Spec.Number != newSpec.Bundles.Spec.Number { return true } return false } func needsNewWorkloadTemplate(oldSpec, newSpec *cluster.Spec, oldWorker, newWorker v1alpha1.WorkerNodeGroupConfiguration) bool { if oldSpec.Bundles.Spec.Number != newSpec.Bundles.Spec.Number { return true } if !v1alpha1.TaintsSliceEqual(oldWorker.Taints, newWorker.Taints) || !v1alpha1.MapEqual(oldWorker.Labels, newWorker.Labels) || !v1alpha1.WorkerNodeGroupConfigurationKubeVersionUnchanged(&oldWorker, &newWorker, oldSpec.Cluster, newSpec.Cluster) { return true } return false } func needsNewKubeadmConfigTemplate(newWorkerNodeGroup, oldWorkerNodeGroup *v1alpha1.WorkerNodeGroupConfiguration) bool { return !v1alpha1.TaintsSliceEqual(newWorkerNodeGroup.Taints, oldWorkerNodeGroup.Taints) || !v1alpha1.MapEqual(newWorkerNodeGroup.Labels, oldWorkerNodeGroup.Labels) } func (p *Provider) SetupAndValidateUpgradeCluster(ctx context.Context, cluster *types.Cluster, clusterSpec *cluster.Spec, currentClusterSpec *cluster.Spec) error { if clusterSpec.Cluster.Spec.ExternalEtcdConfiguration != nil { return errExternalEtcdUnsupported } if err := p.configureSshKeys(); err != nil { return err } // If we've been given a CSV with additional hardware for the cluster, validate it and // write it to the catalogue so it can be used for further processing. if p.hardwareCSVIsProvided() { machineCatalogueWriter := hardware.NewMachineCatalogueWriter(p.catalogue) machines, err := hardware.NewNormalizedCSVReaderFromFile(p.hardwareCSVFile, p.BMCOptions) if err != nil { return err } machineValidator := hardware.NewDefaultMachineValidator() if err := hardware.TranslateAll(machines, machineCatalogueWriter, machineValidator); err != nil { return err } } // Retrieve all unprovisioned hardware from the existing cluster and populate the catalogue so // it can be considered for the upgrade. hardware, err := p.providerKubectlClient.GetUnprovisionedTinkerbellHardware( ctx, cluster.KubeconfigFile, constants.EksaSystemNamespace, ) if err != nil { return fmt.Errorf("retrieving unprovisioned hardware: %v", err) } for i := range hardware { if err := p.catalogue.InsertHardware(&hardware[i]); err != nil { return err } } // Retrieve all provisioned hardware from the existing cluster and populate diskExtractors's // disksProvisionedHardware map for use during upgrade hardware, err = p.providerKubectlClient.GetProvisionedTinkerbellHardware( ctx, cluster.KubeconfigFile, constants.EksaSystemNamespace, ) if err != nil { return fmt.Errorf("retrieving provisioned hardware: %v", err) } // Remove all the provisioned hardware from the existing cluster if repeated from the hardware csv input. if err := p.catalogue.RemoveHardwares(hardware); err != nil { return err } upgradeStrategy := clusterSpec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy // skip extra hardware validation for InPlace upgrades if upgradeStrategy == nil || upgradeStrategy.Type != v1alpha1.InPlaceStrategyType { if err := p.validateAvailableHardwareForUpgrade(ctx, currentClusterSpec, clusterSpec); err != nil { return err } } if p.clusterConfig.IsManaged() { // Update stack helm enviorment variable NO_PROXY value and append management cluster's Control plane Endpoint IP in case of workload cluster upgrade if clusterSpec.Cluster.Spec.ProxyConfiguration != nil { managementCluster, err := p.providerKubectlClient.GetEksaCluster(ctx, clusterSpec.ManagementCluster, clusterSpec.Cluster.Spec.ManagementCluster.Name) if err != nil { return err } p.stackInstaller.AddNoProxyIP(managementCluster.Spec.ControlPlaneConfiguration.Endpoint.Host) } } if err := p.applyHardwareUpgrade(ctx, cluster); err != nil { return err } // Check if the hardware in the catalogue have a BMCRef. Since we only allow either all hardware with bmc // or no hardware with bmc, its sufficient to check the first hardware. if p.catalogue.TotalHardware() > 0 && p.catalogue.AllHardware()[0].Spec.BMCRef != nil { // Waiting to ensure all the new and exisiting baseboardmanagement connections are valid. err = p.providerKubectlClient.WaitForRufioMachines(ctx, cluster, "5m", "Contactable", constants.EksaSystemNamespace) if err != nil { return fmt.Errorf("waiting for baseboard management to be contactable: %v", err) } } return nil } // SetupAndValidateUpgradeManagementComponents performs necessary setup for upgrade management components operation. func (p *Provider) SetupAndValidateUpgradeManagementComponents(_ context.Context, _ *cluster.Spec) error { return nil } // validateAvailableHardwareForUpgrade adds the necessary hardware assertions for an upgrade // these includes both rollingUpgrades across control-plane and worker nodes // and modular upgrades pertaining to only control-plane or worker components. func (p *Provider) validateAvailableHardwareForUpgrade(ctx context.Context, currentSpec, newClusterSpec *cluster.Spec) (err error) { clusterSpecValidator := NewClusterSpecValidator( HardwareSatisfiesOnlyOneSelectorAssertion(p.catalogue), ) eksaVersionUpgrade := currentSpec.Bundles.Spec.Number != newClusterSpec.Bundles.Spec.Number currentTinkerbellSpec := NewClusterSpec(currentSpec, currentSpec.TinkerbellMachineConfigs, currentSpec.TinkerbellDatacenter) rollingUpgrade := p.isRollingUpgrade(currentSpec, newClusterSpec) currentCluster := &ValidatableTinkerbellClusterSpec{currentTinkerbellSpec} if rollingUpgrade || eksaVersionUpgrade { clusterSpecValidator.Register(ExtraHardwareAvailableAssertionForRollingUpgrade(p.catalogue, currentCluster, eksaVersionUpgrade)) } requirements, err := p.validateHardwareReqForControlPlaneRollOut(currentSpec, newClusterSpec) if err != nil { return err } workerRequirements, err := p.validateHardwareReqForWorkerNodeGroupsRollOut(currentSpec, newClusterSpec) if err != nil { return err } // Hardware selectors for controlPlane and worker nodes are mutually exclusive, so its safe to copy // as no keys are going to be overwritten for k, v := range workerRequirements { requirements[k] = v } clusterSpecValidator.Register(ExtraHardwareAvailableAssertionForNodeRollOut(p.catalogue, requirements)) // ScaleUpDown should not be supported in case of either rolling upgrade or eksa version upgrade. clusterSpecValidator.Register(AssertionsForScaleUpDown(p.catalogue, currentCluster, rollingUpgrade || eksaVersionUpgrade)) tinkerbellClusterSpec := NewClusterSpec(newClusterSpec, p.machineConfigs, p.datacenterConfig) if err := clusterSpecValidator.Validate(tinkerbellClusterSpec); err != nil { return err } return nil } // validateHardwareReqForControlPlaneRollOut checks if new ControlPlane nodes need to be rolled out // and returns the HardwareRequirements for ControlPlane HardwareSelector // as we open up more feature gates in our spec this function can be used to // validate against the features that affect control plane only ex. API server extra args, etc. func (p *Provider) validateHardwareReqForControlPlaneRollOut(currentSpec, newClusterSpec *cluster.Spec) (MinimumHardwareRequirements, error) { oldCP := currentSpec.TinkerbellMachineConfigs[currentSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name] newCP := newClusterSpec.TinkerbellMachineConfigs[currentSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name] maxSurge := 1 requirements := MinimumHardwareRequirements{} rolloutStrategy := newClusterSpec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy if rolloutStrategy != nil && rolloutStrategy.Type == "RollingUpdate" { maxSurge = newClusterSpec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy.RollingUpdate.MaxSurge } if oldCP.Spec.OSImageURL != newCP.Spec.OSImageURL { if err := requirements.Add(newCP.Spec.HardwareSelector, maxSurge); err != nil { return nil, fmt.Errorf("validating hardware requirements for control-plane nodes roll out: %v", err) } } return requirements, nil } // validateHardwareReqForWorkerNodeGroupsRollOut checks if new Worker nodes need to be rolled out for a given worker node group configuration // and returns the HardwareRequirements for Worker node group HardwareSelector // as we open up more feature gates in our spec this function can be used to // validate against the features that affect worker groups only. func (p *Provider) validateHardwareReqForWorkerNodeGroupsRollOut(currentSpec, newClusterSpec *cluster.Spec) (MinimumHardwareRequirements, error) { requirements := MinimumHardwareRequirements{} for _, newWngConfig := range newClusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { oldWng := currentSpec.TinkerbellMachineConfigs[newWngConfig.MachineGroupRef.Name] newWng := newClusterSpec.TinkerbellMachineConfigs[newWngConfig.MachineGroupRef.Name] if oldWng != nil && oldWng.Spec.OSImageURL != newWng.Spec.OSImageURL { maxSurge := 1 rolloutStrategy := newWngConfig.UpgradeRolloutStrategy if rolloutStrategy != nil && rolloutStrategy.Type == "RollingUpdate" { maxSurge = rolloutStrategy.RollingUpdate.MaxSurge } if err := requirements.Add(newWng.Spec.HardwareSelector, maxSurge); err != nil { return nil, fmt.Errorf("validating hardware requirements for worker node groups roll out: %v", err) } } } return requirements, nil } // PostBootstrapDeleteForUpgrade runs any provider-specific operations after bootstrap cluster has been deleted. func (p *Provider) PostBootstrapDeleteForUpgrade(ctx context.Context, cluster *types.Cluster) error { if err := p.stackInstaller.UninstallLocal(ctx); err != nil { return err } return nil } func (p *Provider) PostBootstrapSetupUpgrade(ctx context.Context, clusterConfig *v1alpha1.Cluster, cluster *types.Cluster) error { return p.applyHardwareUpgrade(ctx, cluster) } // ApplyHardwareToCluster adds all the hardwares to the cluster. func (p *Provider) applyHardwareUpgrade(ctx context.Context, cluster *types.Cluster) error { allHardware := p.catalogue.AllHardware() if len(allHardware) == 0 { return nil } hardwareSpec, err := hardware.MarshalCatalogue(p.catalogue) if err != nil { return fmt.Errorf("failed marshalling resources for hardware spec: %v", err) } err = p.providerKubectlClient.ApplyKubeSpecFromBytesForce(ctx, cluster, hardwareSpec) if err != nil { return fmt.Errorf("applying hardware yaml: %v", err) } return nil } func (p *Provider) PostMoveManagementToBootstrap(ctx context.Context, bootstrapCluster *types.Cluster) error { // Check if the hardware in the catalogue have a BMCRef. Since we only allow either all hardware with bmc // or no hardware with bmc, its sufficient to check the first hardware. if p.catalogue.TotalHardware() > 0 && p.catalogue.AllHardware()[0].Spec.BMCRef != nil { // Waiting to ensure all the new and exisiting baseboardmanagement connections are valid. err := p.providerKubectlClient.WaitForRufioMachines(ctx, bootstrapCluster, "5m", "Contactable", constants.EksaSystemNamespace) if err != nil { return fmt.Errorf("waiting for baseboard management to be contactable: %v", err) } } return nil } func (p *Provider) RunPostControlPlaneUpgrade(ctx context.Context, oldClusterSpec *cluster.Spec, clusterSpec *cluster.Spec, workloadCluster *types.Cluster, managementCluster *types.Cluster) error { // @TODO: do we need this for bare metal upgrade? // Use retrier so that cluster upgrade does not fail due to any intermittent failure while connecting to kube-api server // This is unfortunate, but ClusterResourceSet's don't support any type of reapply of the resources they manage // Even if we create a new ClusterResourceSet, if such resources already exist in the cluster, they won't be reapplied // The long term solution is to add this capability to the cluster-api controller, // with a new mode like "ReApplyOnChanges" or "ReApplyOnCreate" vs the current "ReApplyOnce" /* err := p.retrier.Retry( func() error { return p.resourceSetManager.ForceUpdate(ctx, resourceSetName(clusterSpec), constants.EksaSystemNamespace, managementCluster, workloadCluster) }, ) if err != nil { return fmt.Errorf("failed updating the tinkerbell provider resource set post upgrade: %v", err) } */ return nil } // ValidateNewSpec satisfies the Provider interface. func (p *Provider) ValidateNewSpec(ctx context.Context, cluster *types.Cluster, clusterSpec *cluster.Spec) error { desiredClstrSpec := clusterSpec currentClstr, err := p.providerKubectlClient.GetEksaCluster(ctx, cluster, desiredClstrSpec.Cluster.Name) if err != nil { return err } currentDCName := currentClstr.Spec.DatacenterRef.Name currentDCCfg, err := p.providerKubectlClient.GetEksaTinkerbellDatacenterConfig(ctx, currentDCName, cluster.KubeconfigFile, currentClstr.Namespace) if err != nil { return err } currentWNGs := currentClstr.Spec.WorkerNodeGroupConfigurations desiredWNGs := desiredClstrSpec.Cluster.Spec.WorkerNodeGroupConfigurations err = p.validateMachineCfgsImmutability(ctx, cluster, currentClstr, desiredClstrSpec, currentWNGs, desiredWNGs) if err != nil { return err } desiredDCCfgSpec := p.datacenterConfig.Spec if desiredDCCfgSpec.TinkerbellIP != currentDCCfg.Spec.TinkerbellIP { return fmt.Errorf("spec.tinkerbellIP is immutable; previous = %s, new = %s", currentDCCfg.Spec.TinkerbellIP, desiredDCCfgSpec.TinkerbellIP) } // for any operation other than k8s version change, hookImageURL is immutable if currentClstr.Spec.KubernetesVersion == desiredClstrSpec.Cluster.Spec.KubernetesVersion { if desiredDCCfgSpec.HookImagesURLPath != currentDCCfg.Spec.HookImagesURLPath { return fmt.Errorf("spec.hookImagesURLPath is immutable. previous = %s, new = %s", currentDCCfg.Spec.HookImagesURLPath, desiredDCCfgSpec.HookImagesURLPath) } } return nil } func (p *Provider) validateMachineCfgsImmutability(ctx context.Context, clstr *types.Cluster, currentClstr *v1alpha1.Cluster, desiredClstrSpec *cluster.Spec, currentWNGs, desiredWNGs []v1alpha1.WorkerNodeGroupConfiguration) error { currentCPMachineRef := currentClstr.Spec.ControlPlaneConfiguration.MachineGroupRef desiredCPMachineRef := desiredClstrSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef if !currentCPMachineRef.Equal(desiredCPMachineRef) { return errors.New("control plane machine config reference is immutable") } err := validateRefsUnchanged(currentWNGs, desiredWNGs) if err != nil { return err } currentMachineCfgRefsMap := p.machineConfigs currentWNGsSet := collection.MapSet(currentWNGs, func(v v1alpha1.WorkerNodeGroupConfiguration) string { return v.Name }) // newWNGs contains the set of worker node group names specified in the desired spec that are new. newWNGs := collection.NewSet[string]() for _, wng := range desiredWNGs { if !currentWNGsSet.Contains(wng.Name) { newWNGs.Add(wng.MachineGroupRef.Name) } } for _, machineCfgRef := range desiredClstrSpec.Cluster.MachineConfigRefs() { machineCfg, ok := currentMachineCfgRefsMap[machineCfgRef.Name] if !ok { return fmt.Errorf("cannot find machine config %s in tinkerbell provider machine configs", machineCfgRef.Name) } // If the machine config reference is for a new worker node group don't bother with // immutability checks as we want users to be able to add worker node groups. if !newWNGs.Contains(machineCfgRef.Name) { if _, has := currentMachineCfgRefsMap[machineCfg.Name]; !has { return fmt.Errorf("cannot change machine config references") } err := p.validateMachineCfg(ctx, clstr, machineCfg, desiredClstrSpec) if err != nil { return err } } } return nil } func validateRefsUnchanged(current, desired []v1alpha1.WorkerNodeGroupConfiguration) error { lookup := collection.ToMap(desired, func(v v1alpha1.WorkerNodeGroupConfiguration) string { return v.Name }) var errs []error // For every current worker node group that still exists in the desired config, ensure the // machine config is still the same. for _, curr := range current { desi, exists := lookup[curr.Name] if !exists { continue } if !curr.MachineGroupRef.Equal(desi.MachineGroupRef) { errs = append(errs, fmt.Errorf("%v: worker node group machine config reference is immutable", curr.Name)) } } return kerrors.NewAggregate(errs) } func (p *Provider) UpgradeNeeded(_ context.Context, _, _ *cluster.Spec, _ *types.Cluster) (bool, error) { // TODO: Figure out if something is needed here return false, nil } func (p *Provider) hardwareCSVIsProvided() bool { return p.hardwareCSVFile != "" } func (p *Provider) isScaleUpDown(oldCluster *v1alpha1.Cluster, newCluster *v1alpha1.Cluster) bool { if oldCluster.Spec.ControlPlaneConfiguration.Count != newCluster.Spec.ControlPlaneConfiguration.Count { return true } workerNodeGroupMap := make(map[string]*v1alpha1.WorkerNodeGroupConfiguration) for i := range oldCluster.Spec.WorkerNodeGroupConfigurations { workerNodeGroupMap[oldCluster.Spec.WorkerNodeGroupConfigurations[i].Name] = &oldCluster.Spec.WorkerNodeGroupConfigurations[i] } for _, nodeGroupNewSpec := range newCluster.Spec.WorkerNodeGroupConfigurations { if workerNodeGrpOldSpec, ok := workerNodeGroupMap[nodeGroupNewSpec.Name]; ok { if *nodeGroupNewSpec.Count != *workerNodeGrpOldSpec.Count { return true } } } return false } func (p *Provider) isRollingUpgrade(currentSpec, newClusterSpec *cluster.Spec) bool { if currentSpec.Cluster.Spec.KubernetesVersion != newClusterSpec.Cluster.Spec.KubernetesVersion || currentSpec.TinkerbellDatacenter.Spec.OSImageURL != newClusterSpec.TinkerbellDatacenter.Spec.OSImageURL { return true } currentWNGSwithK8sVersion := WorkerNodeGroupWithK8sVersion(currentSpec) desiredWNGwithK8sVersion := WorkerNodeGroupWithK8sVersion(newClusterSpec) for wngName, K8sVersion := range desiredWNGwithK8sVersion { currentWngK8sVersion, ok := currentWNGSwithK8sVersion[wngName] if ok && (currentWngK8sVersion != K8sVersion) { return true } } return false } // WorkerNodeGroupWithK8sVersion maps each worker node group configurations in s to its K8s version. func WorkerNodeGroupWithK8sVersion(spec *cluster.Spec) map[string]v1alpha1.KubernetesVersion { WNGwithK8sVersion := make(map[string]v1alpha1.KubernetesVersion) K8sVersion := spec.Cluster.Spec.KubernetesVersion for _, wng := range spec.Cluster.Spec.WorkerNodeGroupConfigurations { mdName := fmt.Sprintf("%s-%s", spec.Cluster.Name, wng.Name) if wng.KubernetesVersion != nil { K8sVersion = *wng.KubernetesVersion } WNGwithK8sVersion[mdName] = K8sVersion } return WNGwithK8sVersion } func (p *Provider) validateMachineCfg(ctx context.Context, cluster *types.Cluster, newConfig *v1alpha1.TinkerbellMachineConfig, clusterSpec *cluster.Spec) error { prevMachineConfig, err := p.providerKubectlClient.GetEksaTinkerbellMachineConfig(ctx, newConfig.Name, cluster.KubeconfigFile, clusterSpec.Cluster.Namespace) if err != nil { return err } if newConfig.Spec.OSFamily != prevMachineConfig.Spec.OSFamily { return fmt.Errorf("spec.osFamily is immutable. Previous value %v, New value %v", prevMachineConfig.Spec.OSFamily, newConfig.Spec.OSFamily) } if newConfig.Spec.Users[0].SshAuthorizedKeys[0] != prevMachineConfig.Spec.Users[0].SshAuthorizedKeys[0] { return fmt.Errorf("spec.Users[0].SshAuthorizedKeys[0] is immutable. Previous value %s, New value %s", prevMachineConfig.Spec.Users[0].SshAuthorizedKeys[0], newConfig.Spec.Users[0].SshAuthorizedKeys[0]) } if newConfig.Spec.Users[0].Name != prevMachineConfig.Spec.Users[0].Name { return fmt.Errorf("spec.Users[0].Name is immutable. Previous value %s, New value %s", prevMachineConfig.Spec.Users[0].Name, newConfig.Spec.Users[0].Name) } if !reflect.DeepEqual(newConfig.Spec.HardwareSelector, prevMachineConfig.Spec.HardwareSelector) { return fmt.Errorf("spec.HardwareSelector is immutable. Previous value %v, New value %v", prevMachineConfig.Spec.HardwareSelector, newConfig.Spec.HardwareSelector) } return nil } // PreCoreComponentsUpgrade satisfies the Provider interface. func (p *Provider) PreCoreComponentsUpgrade( ctx context.Context, cluster *types.Cluster, managementComponents *cluster.ManagementComponents, clusterSpec *cluster.Spec, ) error { // When a workload cluster the cluster object could be nil. Noop if it is. if cluster == nil { return nil } if clusterSpec == nil { return errors.New("cluster spec is nil") } // PreCoreComponentsUpgrade can be called for workload clusters. Ensure we only attempt to // upgrade the stack if we're upgrading a management cluster. if clusterSpec.Cluster.IsManaged() { return nil } // Attempt the upgrade. This should upgrade the stack in the management cluster by updating // images, installing new CRDs and possibly removing old ones. // Check if cluster has legacy chart installed hasLegacy, err := p.stackInstaller.HasLegacyChart(ctx, managementComponents.Tinkerbell, cluster.KubeconfigFile) if err != nil { return fmt.Errorf("getting legacy chart: %v", err) } if hasLegacy { // Upgrade legacy chart to add resource policy keep to the CRDs err = p.stackInstaller.UpgradeLegacy( ctx, managementComponents.Tinkerbell, cluster.KubeconfigFile, stack.WithLoadBalancerEnabled( len(clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations) != 0 && // load balancer is handled by kube-vip in control plane nodes !p.datacenterConfig.Spec.SkipLoadBalancerDeployment), // configure load balancer based on datacenterConfig.Spec.SkipLoadBalancerDeployment ) if err != nil { return fmt.Errorf("upgrading legacy chart: %v", err) } // Uninstall legacy chart err = p.stackInstaller.Uninstall( ctx, managementComponents.Tinkerbell, cluster.KubeconfigFile, ) if err != nil { return fmt.Errorf("uninstalling legacy chart: %v", err) } // annotate existing CRDs to point to new CRDs chart err = p.annotateCRDs(ctx, cluster) if err != nil { return fmt.Errorf("annotating crds: %v", err) } } // upgrade install CRDs chart err = p.stackInstaller.UpgradeInstallCRDs( ctx, managementComponents.Tinkerbell, cluster.KubeconfigFile, ) if err != nil { return fmt.Errorf("upgrading crds chart: %v", err) } // upgrade install tink stack chart err = p.stackInstaller.Upgrade( ctx, managementComponents.Tinkerbell, p.datacenterConfig.Spec.TinkerbellIP, cluster.KubeconfigFile, p.datacenterConfig.Spec.HookImagesURLPath, stack.WithLoadBalancerInterface(p.datacenterConfig.Spec.LoadBalancerInterface), stack.WithBootsOnKubernetes(), stack.WithStackServiceEnabled(true), stack.WithDHCPRelayEnabled(true), stack.WithLoadBalancerEnabled( len(clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations) != 0 && // load balancer is handled by kube-vip in control plane nodes !p.datacenterConfig.Spec.SkipLoadBalancerDeployment), // configure load balancer based on datacenterConfig.Spec.SkipLoadBalancerDeployment stack.WithHookIsoOverride(p.datacenterConfig.Spec.HookIsoURL), ) if err != nil { return fmt.Errorf("upgrading stack: %v", err) } hasBaseboardManagement, err := p.providerKubectlClient.HasCRD( ctx, rufiounreleased.BaseboardManagementResourceName, cluster.KubeconfigFile, ) if err != nil { return fmt.Errorf("upgrading rufio crds: %v", err) } // We introduced the Rufio dependency prior to its initial release. Between its introduction // and its official release breaking changes occured to the CRDs. We're using the presence // of the obsolete BaseboardManagement CRD to determine if there's an old Rufio installed. // If there is, we need to convert all obsolete BaseboardManagement CRs to Machine CRs (the // CRD that superseeds BaseboardManagement). if hasBaseboardManagement { if err := p.handleRufioUnreleasedCRDs(ctx, cluster); err != nil { return fmt.Errorf("upgrading rufio crds: %v", err) } // Remove the unreleased Rufio CRDs from the cluster; this will also remove any residual // resources. err = p.providerKubectlClient.DeleteCRD( ctx, rufiounreleased.BaseboardManagementResourceName, cluster.KubeconfigFile, ) if err != nil { return fmt.Errorf("could not delete machines crd: %v", err) } } return nil } func (p *Provider) handleRufioUnreleasedCRDs(ctx context.Context, cluster *types.Cluster) error { // Firstly, retrieve all BaseboardManagement CRs and convert them to Machine CRs. bm, err := p.providerKubectlClient.AllBaseboardManagements( ctx, cluster.KubeconfigFile, ) if err != nil { return fmt.Errorf("retrieving baseboardmanagement resources: %v", err) } if len(bm) > 0 { serialized, err := yaml.Serialize(toRufioMachines(bm)...) if err != nil { return fmt.Errorf("serializing machines: %v", err) } err = p.providerKubectlClient.ApplyKubeSpecFromBytesWithNamespace( ctx, cluster, yaml.Join(serialized), p.stackInstaller.GetNamespace(), ) if err != nil { return fmt.Errorf("applying machines: %v", err) } } // Secondly, iterate over all Hardwarfe CRs and update the BMCRef to point to the new Machine // CR. hardware, err := p.providerKubectlClient.AllTinkerbellHardware(ctx, cluster.KubeconfigFile) if err != nil { return fmt.Errorf("retrieving hardware resources: %v", err) } var updatedHardware []tinkv1alpha1.Hardware for _, h := range hardware { if h.Spec.BMCRef != nil { h.Spec.BMCRef.Kind = "Machine" updatedHardware = append(updatedHardware, h) } } if len(updatedHardware) > 0 { serialized, err := yaml.Serialize(updatedHardware...) if err != nil { return fmt.Errorf("serializing hardware: %v", err) } err = p.providerKubectlClient.ApplyKubeSpecFromBytesForce(ctx, cluster, yaml.Join(serialized)) if err != nil { return fmt.Errorf("applying hardware: %v", err) } } return nil } func toRufioMachines(items []rufiounreleased.BaseboardManagement) []rufiov1.Machine { var machines []rufiov1.Machine for _, item := range items { machines = append(machines, rufiov1.Machine{ // We need to populate type meta because we apply with kubectl (leakage). TypeMeta: metav1.TypeMeta{ Kind: "Machine", APIVersion: rufiov1.GroupVersion.String(), }, ObjectMeta: item.ObjectMeta, Spec: rufiov1.MachineSpec{ Connection: rufiov1.Connection{ AuthSecretRef: item.Spec.Connection.AuthSecretRef, Host: item.Spec.Connection.Host, Port: item.Spec.Connection.Port, InsecureTLS: item.Spec.Connection.InsecureTLS, }, }, }) } return machines } func (p *Provider) annotateCRDs(ctx context.Context, cluster *types.Cluster) error { annotation := map[string]string{ "meta.helm.sh/release-name": "tinkerbell-crds", "meta.helm.sh/release-namespace": "eksa-system", } // machine err := p.providerKubectlClient.UpdateAnnotation(ctx, "customresourcedefinition", "machines.bmc.tinkerbell.org", annotation, executables.WithCluster(cluster), executables.WithOverwrite()) if err != nil { return fmt.Errorf("annotating rufio machines: %v", err) } // task err = p.providerKubectlClient.UpdateAnnotation(ctx, "customresourcedefinition", "tasks.bmc.tinkerbell.org", annotation, executables.WithCluster(cluster), executables.WithOverwrite()) if err != nil { return fmt.Errorf("annotating rufio tasks: %v", err) } // job err = p.providerKubectlClient.UpdateAnnotation(ctx, "customresourcedefinition", "jobs.bmc.tinkerbell.org", annotation, executables.WithCluster(cluster), executables.WithOverwrite()) if err != nil { return fmt.Errorf("annotating rufio jobs: %v", err) } // hardware err = p.providerKubectlClient.UpdateAnnotation(ctx, "customresourcedefinition", "hardware.tinkerbell.org", annotation, executables.WithCluster(cluster), executables.WithOverwrite()) if err != nil { return fmt.Errorf("annotating tinkerbell hardware: %v", err) } // template err = p.providerKubectlClient.UpdateAnnotation(ctx, "customresourcedefinition", "templates.tinkerbell.org", annotation, executables.WithCluster(cluster), executables.WithOverwrite()) if err != nil { return fmt.Errorf("annotating tinkerbell templates: %v", err) } // workflow err = p.providerKubectlClient.UpdateAnnotation(ctx, "customresourcedefinition", "workflows.tinkerbell.org", annotation, executables.WithCluster(cluster), executables.WithOverwrite()) if err != nil { return fmt.Errorf("annotating tinkerbell workflows: %v", err) } return nil }