pkg/providers/tinkerbell/assert.go (412 lines of code) (raw):
package tinkerbell
import (
"errors"
"fmt"
"net/http"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
"github.com/aws/eks-anywhere/pkg/api/v1alpha1"
tinkerbellv1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1/thirdparty/tinkerbell/capt/v1beta1"
"github.com/aws/eks-anywhere/pkg/clusterapi"
"github.com/aws/eks-anywhere/pkg/networkutils"
"github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware"
)
// TODO(chrisdoherty) Add worker node group assertions
// AssertMachineConfigsValid iterates over all machine configs in calling validateMachineConfig.
func AssertMachineConfigsValid(spec *ClusterSpec) error {
for _, config := range spec.MachineConfigs {
if err := config.Validate(); err != nil {
return err
}
}
return nil
}
// AssertDatacenterConfigValid asserts the DatacenterConfig in spec is valid.
func AssertDatacenterConfigValid(spec *ClusterSpec) error {
return spec.DatacenterConfig.Validate()
}
// AssertMachineConfigNamespaceMatchesDatacenterConfig ensures all machine configuration instances
// are configured with the same namespace as the provider specific data center configuration
// namespace.
func AssertMachineConfigNamespaceMatchesDatacenterConfig(spec *ClusterSpec) error {
return validateMachineConfigNamespacesMatchDatacenterConfig(spec.DatacenterConfig, spec.MachineConfigs)
}
// AssertControlPlaneMachineRefExists ensures the control plane machine ref is referencing a
// known machine config.
func AssertControlPlaneMachineRefExists(spec *ClusterSpec) error {
controlPlaneMachineRef := spec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef
if err := validateMachineRefExists(controlPlaneMachineRef, spec.MachineConfigs); err != nil {
return fmt.Errorf("control plane configuration machine ref: %v", err)
}
return nil
}
// AssertEtcdMachineRefExists ensures that, if the etcd configuration is specified, it references
// a known machine config.
func AssertEtcdMachineRefExists(spec *ClusterSpec) error {
// Unstacked etcd is optional.
if spec.Cluster.Spec.ExternalEtcdConfiguration == nil {
return nil
}
etcdMachineRef := spec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef
if err := validateMachineRefExists(etcdMachineRef, spec.MachineConfigs); err != nil {
return fmt.Errorf("external etcd configuration machine group ref: %v", err)
}
return nil
}
// AssertWorkerNodeGroupMachineRefsExists ensures all worker node group machine refs are
// referencing a known machine config.
func AssertWorkerNodeGroupMachineRefsExists(spec *ClusterSpec) error {
for _, group := range spec.Cluster.Spec.WorkerNodeGroupConfigurations {
groupRef := group.MachineGroupRef
if err := validateMachineRefExists(groupRef, spec.MachineConfigs); err != nil {
return fmt.Errorf("worker node group configuration machine group ref: %v", err)
}
}
return nil
}
// AssertK8SVersionNot120 ensures Kubernetes version is not set to v1.20.
func AssertK8SVersionNot120(spec *ClusterSpec) error {
if spec.Cluster.Spec.KubernetesVersion == v1alpha1.Kube120 {
return errors.New("kubernetes version v1.20 is not supported for Bare Metal")
}
return nil
}
func AssertOsFamilyValid(spec *ClusterSpec) error {
return validateOsFamily(spec)
}
// AssertUpgradeRolloutStrategyValid ensures that the upgrade rollout strategy is valid for both CP and worker node configurations.
func AssertUpgradeRolloutStrategyValid(spec *ClusterSpec) error {
return validateUpgradeRolloutStrategy(spec)
}
// AssertAutoScalerDisabledForInPlace ensures that the autoscaler configuration is not enabled when upgrade rollout strategy is InPlace.
func AssertAutoScalerDisabledForInPlace(spec *ClusterSpec) error {
return validateAutoScalerDisabledForInPlace(spec)
}
// AssertOSImageURL ensures that the OSImageURL value is either set at the datacenter config level or set for each machine config and not at both levels.
func AssertOSImageURL(spec *ClusterSpec) error {
return validateOSImageURL(spec)
}
// AssertISOURL ensures that the ISOURL value set is in the expected file format that the smee deployment expects.
func AssertISOURL(spec *ClusterSpec) error {
return validateISOURL(spec)
}
// AssertcontrolPlaneIPNotInUse ensures the endpoint host for the control plane isn't in use.
// The check may be unreliable due to its implementation.
func NewIPNotInUseAssertion(client networkutils.NetClient) ClusterSpecAssertion {
return func(spec *ClusterSpec) error {
ip := spec.Cluster.Spec.ControlPlaneConfiguration.Endpoint.Host
if err := validateIPUnused(client, ip); err != nil {
return fmt.Errorf("control plane endpoint ip in use: %v", ip)
}
return nil
}
}
// AssertTinkerbellIPNotInUse ensures tinkerbell ip isn't in use.
func AssertTinkerbellIPNotInUse(client networkutils.NetClient) ClusterSpecAssertion {
return func(spec *ClusterSpec) error {
ip := spec.DatacenterConfig.Spec.TinkerbellIP
if err := validateIPUnused(client, ip); err != nil {
return fmt.Errorf("tinkerbellIP <%s> is already in use, please provide a unique IP", ip)
}
return nil
}
}
// AssertTinkerbellIPAndControlPlaneIPNotSame ensures tinkerbell ip and controlplane ip are not the same.
func AssertTinkerbellIPAndControlPlaneIPNotSame(spec *ClusterSpec) error {
tinkerbellIP := spec.DatacenterConfig.Spec.TinkerbellIP
controlPlaneIP := spec.Cluster.Spec.ControlPlaneConfiguration.Endpoint.Host
if tinkerbellIP == controlPlaneIP {
return fmt.Errorf("controlPlaneConfiguration.endpoint.host and tinkerbellIP are the same (%s), please provide two unique IPs", tinkerbellIP)
}
return nil
}
// AssertHookRetrievableWithoutProxy ensures the executing machine can retrieve Hook
// from the host URL without a proxy configured. It does not guarantee the target node
// will be able to download Hook.
func AssertHookRetrievableWithoutProxy(spec *ClusterSpec) error {
if spec.Cluster.Spec.ProxyConfiguration == nil {
return nil
}
// return an error if hookImagesURLPath field is not specified for during Proxy configuration.
if spec.DatacenterConfig.Spec.HookImagesURLPath == "" {
return fmt.Errorf("locally hosted hookImagesURLPath is required to support ProxyConfiguration")
}
// verify hookImagesURLPath is accessible locally too
transport := http.DefaultTransport.(*http.Transport).Clone()
transport.Proxy = nil
client := &http.Client{
Transport: transport,
}
resp, err := client.Get(spec.DatacenterConfig.Spec.HookImagesURLPath)
if err != nil {
return fmt.Errorf("HookImagesURLPath: %s needs to be hosted locally while specifiying Proxy configuration: %v", spec.DatacenterConfig.Spec.HookImagesURLPath, err)
}
defer resp.Body.Close()
return nil
}
// AssertPortsNotInUse ensures that ports 80, 42113, and 50061 are available.
func AssertPortsNotInUse(client networkutils.NetClient) ClusterSpecAssertion {
return func(spec *ClusterSpec) error {
host := "0.0.0.0"
if err := validatePortsAvailable(client, host); err != nil {
return err
}
return nil
}
}
// HardwareSatisfiesOnlyOneSelectorAssertion ensures hardware in catalogue only satisfies 1
// of the MachineConfig's HardwareSelector's from the spec.
func HardwareSatisfiesOnlyOneSelectorAssertion(catalogue *hardware.Catalogue) ClusterSpecAssertion {
return func(spec *ClusterSpec) error {
selectors, err := selectorsFromClusterSpec(spec)
if err != nil {
return err
}
return validateHardwareSatisfiesOnlyOneSelector(catalogue.AllHardware(), selectors)
}
}
// selectorsFromClusterSpec extracts all selectors specified on MachineConfig's from spec.
func selectorsFromClusterSpec(spec *ClusterSpec) (selectorSet, error) {
selectors := selectorSet{}
if err := selectors.Add(spec.ControlPlaneMachineConfig().Spec.HardwareSelector); err != nil {
return nil, err
}
for _, nodeGroup := range spec.WorkerNodeGroupConfigurations() {
err := selectors.Add(spec.WorkerNodeGroupMachineConfig(nodeGroup).Spec.HardwareSelector)
if err != nil {
return nil, err
}
}
if spec.HasExternalEtcd() {
if err := selectors.Add(spec.ExternalEtcdMachineConfig().Spec.HardwareSelector); err != nil {
return nil, err
}
}
return selectors, nil
}
// MinimumHardwareAvailableAssertionForCreate asserts that catalogue has sufficient hardware to
// support the ClusterSpec during a create workflow.
//
// It does not protect against intersections or subsets so consumers should ensure a 1-2-1
// mapping between catalogue hardware and selectors.
func MinimumHardwareAvailableAssertionForCreate(catalogue *hardware.Catalogue) ClusterSpecAssertion {
return func(spec *ClusterSpec) error {
// Without Hardware selectors we get undesirable behavior so ensure we have them for
// all MachineConfigs.
if err := ensureHardwareSelectorsSpecified(spec); err != nil {
return err
}
// Build a set of required hardware counts per machine group. minimumHardwareRequirements
// will account for the same selector being specified on different groups.
requirements := MinimumHardwareRequirements{}
err := requirements.Add(
spec.ControlPlaneMachineConfig().Spec.HardwareSelector,
spec.ControlPlaneConfiguration().Count,
)
if err != nil {
return err
}
for _, nodeGroup := range spec.WorkerNodeGroupConfigurations() {
err := requirements.Add(
spec.WorkerNodeGroupMachineConfig(nodeGroup).Spec.HardwareSelector,
*nodeGroup.Count,
)
if err != nil {
return err
}
}
if spec.HasExternalEtcd() {
err := requirements.Add(
spec.ExternalEtcdMachineConfig().Spec.HardwareSelector,
spec.ExternalEtcdConfiguration().Count,
)
if err != nil {
return err
}
}
return validateMinimumHardwareRequirements(requirements, catalogue)
}
}
// WorkerNodeHardware holds machine deployment name, replica count and hardware selector for a Tinkerbell worker node.
type WorkerNodeHardware struct {
MachineDeploymentName string
Replicas int
}
// ValidatableCluster allows assertions to pull worker node and control plane information.
type ValidatableCluster interface {
// WorkerNodeHardwareGroups retrieves a list of WorkerNodeHardwares containing MachineDeployment name,
// replica count and hardware selector for each worker node of a ValidatableCluster.
WorkerNodeHardwareGroups() []WorkerNodeHardware
// ControlPlaneReplicaCount retrieves the control plane replica count of the ValidatableCluster.
ControlPlaneReplicaCount() int
// ClusterK8sVersion retreives the Cluster level Kubernetes version
ClusterK8sVersion() v1alpha1.KubernetesVersion
// WorkerGroupK8sVersion maps each worker group with its Kubernetes version.
WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion
}
// ValidatableTinkerbellClusterSpec wraps around the Tinkerbell ClusterSpec as a ValidatableCluster.
type ValidatableTinkerbellClusterSpec struct {
*ClusterSpec
}
// ControlPlaneReplicaCount retrieves the ValidatableTinkerbellClusterSpec control plane replica count.
func (v *ValidatableTinkerbellClusterSpec) ControlPlaneReplicaCount() int {
return v.Cluster.Spec.ControlPlaneConfiguration.Count
}
// WorkerNodeHardwareGroups retrieves a list of WorkerNodeHardwares for a ValidatableTinkerbellClusterSpec.
func (v *ValidatableTinkerbellClusterSpec) WorkerNodeHardwareGroups() []WorkerNodeHardware {
workerNodeGroupConfigs := make([]WorkerNodeHardware, 0, len(v.Cluster.Spec.WorkerNodeGroupConfigurations))
for _, workerNodeGroup := range v.Cluster.Spec.WorkerNodeGroupConfigurations {
workerNodeGroupConfig := &WorkerNodeHardware{
MachineDeploymentName: machineDeploymentName(v.Cluster.Name, workerNodeGroup.Name),
Replicas: *workerNodeGroup.Count,
}
workerNodeGroupConfigs = append(workerNodeGroupConfigs, *workerNodeGroupConfig)
}
return workerNodeGroupConfigs
}
// ClusterK8sVersion retrieves the Kubernetes version set at the cluster level.
func (v *ValidatableTinkerbellClusterSpec) ClusterK8sVersion() v1alpha1.KubernetesVersion {
return v.Cluster.Spec.KubernetesVersion
}
// WorkerNodeGroupK8sVersion returns each worker node group with its associated Kubernetes version.
func (v *ValidatableTinkerbellClusterSpec) WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion {
return WorkerNodeGroupWithK8sVersion(v.ClusterSpec.Spec)
}
// ValidatableTinkerbellCAPI wraps around the Tinkerbell control plane and worker CAPI obects as a ValidatableCluster.
type ValidatableTinkerbellCAPI struct {
KubeadmControlPlane *controlplanev1.KubeadmControlPlane
WorkerGroups []*clusterapi.WorkerGroup[*tinkerbellv1.TinkerbellMachineTemplate]
}
// ControlPlaneReplicaCount retrieves the ValidatableTinkerbellCAPI control plane replica count.
func (v *ValidatableTinkerbellCAPI) ControlPlaneReplicaCount() int {
return int(*v.KubeadmControlPlane.Spec.Replicas)
}
// WorkerNodeHardwareGroups retrieves a list of WorkerNodeHardwares for a ValidatableTinkerbellCAPI.
func (v *ValidatableTinkerbellCAPI) WorkerNodeHardwareGroups() []WorkerNodeHardware {
workerNodeHardwareList := make([]WorkerNodeHardware, 0, len(v.WorkerGroups))
for _, workerGroup := range v.WorkerGroups {
workerNodeHardware := &WorkerNodeHardware{
MachineDeploymentName: workerGroup.MachineDeployment.Name,
Replicas: int(*workerGroup.MachineDeployment.Spec.Replicas),
}
workerNodeHardwareList = append(workerNodeHardwareList, *workerNodeHardware)
}
return workerNodeHardwareList
}
// ClusterK8sVersion returns the Kubernetes version in major.minor format for a ValidatableTinkerbellCAPI.
func (v *ValidatableTinkerbellCAPI) ClusterK8sVersion() v1alpha1.KubernetesVersion {
return v.toK8sVersion(v.KubeadmControlPlane.Spec.Version)
}
// WorkerNodeGroupK8sVersion returns each worker node group mapped to Kubernetes version in major.minor format for a ValidatableTinkerbellCAPI.
func (v *ValidatableTinkerbellCAPI) WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion {
wngK8sversion := make(map[string]v1alpha1.KubernetesVersion)
for _, wng := range v.WorkerGroups {
k8sVersion := v.toK8sVersion(*wng.MachineDeployment.Spec.Template.Spec.Version)
wngK8sversion[wng.MachineDeployment.Name] = k8sVersion
}
return wngK8sversion
}
func (v *ValidatableTinkerbellCAPI) toK8sVersion(k8sversion string) v1alpha1.KubernetesVersion {
kubeVersion := v1alpha1.KubernetesVersion(k8sversion[1:5])
return kubeVersion
}
// AssertionsForScaleUpDown asserts that catalogue has sufficient hardware to
// support the scaling up/down from current ClusterSpec to desired ValidatableCluster.
// nolint:gocyclo // TODO: Reduce cyclomatic complexity https://github.com/aws/eks-anywhere-internal/issues/1186
func AssertionsForScaleUpDown(catalogue *hardware.Catalogue, current ValidatableCluster, rollingUpgrade bool) ClusterSpecAssertion {
return func(spec *ClusterSpec) error {
// Without Hardware selectors we get undesirable behavior so ensure we have them for
// all MachineConfigs.
if err := ensureHardwareSelectorsSpecified(spec); err != nil {
return err
}
if spec.HasExternalEtcd() {
return fmt.Errorf("scale up/down not supported for external etcd")
}
// Build a set of required hardware counts per machine group. minimumHardwareRequirements
// will account for the same selector being specified on different groups.
requirements := MinimumHardwareRequirements{}
if current.ControlPlaneReplicaCount() != spec.Cluster.Spec.ControlPlaneConfiguration.Count {
if rollingUpgrade {
return fmt.Errorf("cannot perform scale up or down during rolling upgrades")
}
if current.ControlPlaneReplicaCount() < spec.Cluster.Spec.ControlPlaneConfiguration.Count {
err := requirements.Add(
spec.ControlPlaneMachineConfig().Spec.HardwareSelector,
spec.Cluster.Spec.ControlPlaneConfiguration.Count-current.ControlPlaneReplicaCount(),
)
if err != nil {
return fmt.Errorf("error during scale up: %v", err)
}
}
}
workerNodeHardwareMap := make(map[string]WorkerNodeHardware)
for _, workerNodeHardware := range current.WorkerNodeHardwareGroups() {
workerNodeHardwareMap[workerNodeHardware.MachineDeploymentName] = workerNodeHardware
}
for _, nodeGroupNewSpec := range spec.Cluster.Spec.WorkerNodeGroupConfigurations {
nodeGroupMachineDeploymentNameNewSpec := machineDeploymentName(spec.Cluster.Name, nodeGroupNewSpec.Name)
if workerNodeGroupOldSpec, ok := workerNodeHardwareMap[nodeGroupMachineDeploymentNameNewSpec]; ok {
if *nodeGroupNewSpec.Count != workerNodeGroupOldSpec.Replicas {
if rollingUpgrade {
return fmt.Errorf("cannot perform scale up or down during rolling upgrades")
}
if *nodeGroupNewSpec.Count > workerNodeGroupOldSpec.Replicas {
err := requirements.Add(
spec.WorkerNodeGroupMachineConfig(nodeGroupNewSpec).Spec.HardwareSelector,
*nodeGroupNewSpec.Count-workerNodeGroupOldSpec.Replicas,
)
if err != nil {
return fmt.Errorf("error during scale up: %v", err)
}
}
}
} else { // worker node group was newly added
if rollingUpgrade {
return fmt.Errorf("cannot perform scale up or down during rolling upgrades")
}
err := requirements.Add(
spec.WorkerNodeGroupMachineConfig(nodeGroupNewSpec).Spec.HardwareSelector,
*nodeGroupNewSpec.Count,
)
if err != nil {
return fmt.Errorf("error during scale up: %v", err)
}
}
}
if err := validateMinimumHardwareRequirements(requirements, catalogue); err != nil {
return fmt.Errorf("for scale up, %v", err)
}
return nil
}
}
// ExtraHardwareAvailableAssertionForRollingUpgrade asserts that catalogue has sufficient hardware to
// support the ClusterSpec during an rolling upgrade workflow.
func ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue *hardware.Catalogue, current ValidatableCluster, eksaVersionUpgrade bool) ClusterSpecAssertion {
return func(spec *ClusterSpec) error {
// Without Hardware selectors we get undesirable behavior so ensure we have them for
// all MachineConfigs.
if err := ensureHardwareSelectorsSpecified(spec); err != nil {
return err
}
// Build a set of required hardware counts per machine group. minimumHardwareRequirements
// will account for the same selector being specified on different groups.
requirements := MinimumHardwareRequirements{}
if spec.Cluster.Spec.KubernetesVersion != current.ClusterK8sVersion() || eksaVersionUpgrade {
if err := ensureCPHardwareAvailability(spec, requirements); err != nil {
return err
}
}
if err := ensureWorkerHardwareAvailability(spec, current, requirements, eksaVersionUpgrade); err != nil {
return err
}
if spec.HasExternalEtcd() {
return fmt.Errorf("external etcd upgrade is not supported")
}
if err := validateMinimumHardwareRequirements(requirements, catalogue); err != nil {
return fmt.Errorf("for rolling upgrade, %v", err)
}
return nil
}
}
func ensureCPHardwareAvailability(spec *ClusterSpec, hwReq MinimumHardwareRequirements) error {
maxSurge := 1
rolloutStrategy := spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy
if rolloutStrategy != nil && rolloutStrategy.Type == "RollingUpdate" {
maxSurge = spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy.RollingUpdate.MaxSurge
}
err := hwReq.Add(
spec.ControlPlaneMachineConfig().Spec.HardwareSelector,
maxSurge,
)
if err != nil {
return fmt.Errorf("for rolling upgrade, %v", err)
}
return nil
}
func ensureWorkerHardwareAvailability(spec *ClusterSpec, current ValidatableCluster, hwReq MinimumHardwareRequirements, eksaVersionUpgrade bool) error {
currentWngK8sversion := current.WorkerNodeGroupK8sVersion()
desiredWngK8sVersion := WorkerNodeGroupWithK8sVersion(spec.Spec)
for _, nodeGroup := range spec.WorkerNodeGroupConfigurations() {
maxSurge := 1
// As rolling upgrades and scale up/down is not permitted in a single operation, its safe to access directly using the md name.
mdName := fmt.Sprintf("%s-%s", spec.Cluster.Name, nodeGroup.Name)
if currentWngK8sversion[mdName] != desiredWngK8sVersion[mdName] || eksaVersionUpgrade {
if nodeGroup.UpgradeRolloutStrategy != nil && nodeGroup.UpgradeRolloutStrategy.Type == "RollingUpdate" {
maxSurge = nodeGroup.UpgradeRolloutStrategy.RollingUpdate.MaxSurge
}
err := hwReq.Add(
spec.WorkerNodeGroupMachineConfig(nodeGroup).Spec.HardwareSelector,
maxSurge,
)
if err != nil {
return fmt.Errorf("for rolling upgrade, %v", err)
}
}
}
return nil
}
// ensureHardwareSelectorsSpecified ensures each machine config present in spec has a hardware
// selector.
func ensureHardwareSelectorsSpecified(spec *ClusterSpec) error {
if len(spec.ControlPlaneMachineConfig().Spec.HardwareSelector) == 0 {
return missingHardwareSelectorErr{
Name: spec.ControlPlaneMachineConfig().Name,
}
}
for _, nodeGroup := range spec.WorkerNodeGroupConfigurations() {
if len(spec.WorkerNodeGroupMachineConfig(nodeGroup).Spec.HardwareSelector) == 0 {
return missingHardwareSelectorErr{
Name: spec.WorkerNodeGroupMachineConfig(nodeGroup).Name,
}
}
}
if spec.HasExternalEtcd() {
if len(spec.ExternalEtcdMachineConfig().Spec.HardwareSelector) == 0 {
return missingHardwareSelectorErr{
Name: spec.ExternalEtcdMachineConfig().Name,
}
}
}
return nil
}
// ExtraHardwareAvailableAssertionForNodeRollOut asserts catalogue has sufficient hardware to meet minimum requirement
// and is component agnostic between Control Plane and worker nodes.
func ExtraHardwareAvailableAssertionForNodeRollOut(catalogue *hardware.Catalogue, hwReq MinimumHardwareRequirements) ClusterSpecAssertion {
return func(_ *ClusterSpec) error {
if err := validateMinimumHardwareRequirements(hwReq, catalogue); err != nil {
return fmt.Errorf("for node rollout, %v", err)
}
return nil
}
}
type missingHardwareSelectorErr struct {
Name string
}
func (e missingHardwareSelectorErr) Error() string {
return fmt.Sprintf("missing hardware selector for %v", e.Name)
}