func()

in upup/pkg/fi/cloudup/apply_cluster.go [146:771]


func (c *ApplyClusterCmd) Run(ctx context.Context) error {
	if c.TargetName == TargetTerraform {
		found := false
		for _, cp := range TerraformCloudProviders {
			if c.Cloud.ProviderID() == cp {
				found = true
				break
			}
		}
		if !found {
			return fmt.Errorf("cloud provider %v does not support the terraform target", c.Cloud.ProviderID())
		}
	}
	if c.InstanceGroups == nil {
		list, err := c.Clientset.InstanceGroupsFor(c.Cluster).List(ctx, metav1.ListOptions{})
		if err != nil {
			return err
		}
		var instanceGroups []*kops.InstanceGroup
		for i := range list.Items {
			instanceGroups = append(instanceGroups, &list.Items[i])
		}
		c.InstanceGroups = instanceGroups
	}

	for _, ig := range c.InstanceGroups {
		// Try to guess the path for additional third party volume plugins in Flatcar
		image := strings.ToLower(ig.Spec.Image)
		if strings.Contains(image, "flatcar") {
			if c.Cluster.Spec.Kubelet == nil {
				c.Cluster.Spec.Kubelet = &kops.KubeletConfigSpec{}
			}
			if c.Cluster.Spec.Kubelet.VolumePluginDirectory == "" {
				c.Cluster.Spec.Kubelet.VolumePluginDirectory = "/var/lib/kubelet/volumeplugins/"
			}
		}
	}

	channel, err := ChannelForCluster(c.Cluster)
	if err != nil {
		klog.Warningf("%v", err)
	}
	c.channel = channel

	securityLifecycle := fi.LifecycleSync
	networkLifecycle := fi.LifecycleSync
	clusterLifecycle := fi.LifecycleSync

	switch c.Phase {
	case Phase(""):
		// Everything ... the default

	case PhaseNetwork:
		securityLifecycle = fi.LifecycleIgnore
		clusterLifecycle = fi.LifecycleIgnore

	case PhaseSecurity:
		networkLifecycle = fi.LifecycleExistsAndWarnIfChanges
		clusterLifecycle = fi.LifecycleIgnore

	case PhaseCluster:
		if c.TargetName == TargetDryRun {
			securityLifecycle = fi.LifecycleExistsAndWarnIfChanges
			networkLifecycle = fi.LifecycleExistsAndWarnIfChanges
		} else {
			networkLifecycle = fi.LifecycleExistsAndValidates
			securityLifecycle = fi.LifecycleExistsAndValidates
		}

	default:
		return fmt.Errorf("unknown phase %q", c.Phase)
	}
	if c.GetAssets {
		networkLifecycle = fi.LifecycleIgnore
		securityLifecycle = fi.LifecycleIgnore
		clusterLifecycle = fi.LifecycleIgnore
	}

	assetBuilder := assets.NewAssetBuilder(c.Cluster, c.GetAssets)
	err = c.upgradeSpecs(assetBuilder)
	if err != nil {
		return err
	}

	err = c.validateKopsVersion()
	if err != nil {
		return err
	}

	err = c.validateKubernetesVersion()
	if err != nil {
		return err
	}

	cluster := c.Cluster

	configBase, err := vfs.Context.BuildVfsPath(cluster.Spec.ConfigBase)
	if err != nil {
		return fmt.Errorf("error parsing config base %q: %v", cluster.Spec.ConfigBase, err)
	}

	if !c.AllowKopsDowngrade {
		kopsVersionUpdatedBytes, err := configBase.Join(registry.PathKopsVersionUpdated).ReadFile()
		if err == nil {
			kopsVersionUpdated := strings.TrimSpace(string(kopsVersionUpdatedBytes))
			version, err := semver.Parse(kopsVersionUpdated)
			if err != nil {
				return fmt.Errorf("error parsing last kops version updated: %v", err)
			}
			if version.GT(semver.MustParse(kopsbase.Version)) {
				fmt.Printf("\n")
				fmt.Printf("%s\n", starline)
				fmt.Printf("\n")
				fmt.Printf("The cluster was last updated by kops version %s\n", kopsVersionUpdated)
				fmt.Printf("To permit updating by the older version %s, run with the --allow-kops-downgrade flag\n", kopsbase.Version)
				fmt.Printf("\n")
				fmt.Printf("%s\n", starline)
				fmt.Printf("\n")
				return fmt.Errorf("kops version older than last used to update the cluster")
			}
		} else if err != os.ErrNotExist {
			return fmt.Errorf("error reading last kops version used to update: %v", err)
		}
	}

	cloud := c.Cloud

	err = validation.DeepValidate(c.Cluster, c.InstanceGroups, true, cloud)
	if err != nil {
		return err
	}

	if cluster.Spec.KubernetesVersion == "" {
		return fmt.Errorf("KubernetesVersion not set")
	}
	if cluster.Spec.DNSZone == "" && !dns.IsGossipHostname(cluster.ObjectMeta.Name) {
		return fmt.Errorf("DNSZone not set")
	}

	l := &Loader{}
	l.Init()

	keyStore, err := c.Clientset.KeyStore(cluster)
	if err != nil {
		return err
	}

	sshCredentialStore, err := c.Clientset.SSHCredentialStore(cluster)
	if err != nil {
		return err
	}

	secretStore, err := c.Clientset.SecretStore(cluster)
	if err != nil {
		return err
	}

	addonsClient := c.Clientset.AddonsFor(cluster)
	addons, err := addonsClient.List()
	if err != nil {
		return fmt.Errorf("error fetching addons: %v", err)
	}

	// Normalize k8s version
	versionWithoutV := strings.TrimSpace(cluster.Spec.KubernetesVersion)
	versionWithoutV = strings.TrimPrefix(versionWithoutV, "v")
	if cluster.Spec.KubernetesVersion != versionWithoutV {
		klog.Warningf("Normalizing kubernetes version: %q -> %q", cluster.Spec.KubernetesVersion, versionWithoutV)
		cluster.Spec.KubernetesVersion = versionWithoutV
	}

	// check if we should recommend turning off anonymousAuth
	{
		// we do a check here because setting modifying the kubelet object messes with the output
		warn := false
		if cluster.Spec.Kubelet == nil {
			warn = true
		} else if cluster.Spec.Kubelet.AnonymousAuth == nil {
			warn = true
		}

		if warn {
			fmt.Println("")
			fmt.Printf("%s\n", starline)
			fmt.Println("")
			fmt.Println("Kubelet anonymousAuth is currently turned on. This allows RBAC escalation and remote code execution possibilities.")
			fmt.Println("It is highly recommended you turn it off by setting 'spec.kubelet.anonymousAuth' to 'false' via 'kops edit cluster'")
			fmt.Println("")
			fmt.Println("See https://kops.sigs.k8s.io/security/#kubelet-api")
			fmt.Println("")
			fmt.Printf("%s\n", starline)
			fmt.Println("")
		}
	}

	encryptionConfigSecretHash := ""
	if fi.BoolValue(c.Cluster.Spec.EncryptionConfig) {
		secret, err := secretStore.FindSecret("encryptionconfig")
		if err != nil {
			return fmt.Errorf("could not load encryptionconfig secret: %v", err)
		}
		if secret == nil {
			fmt.Println("")
			fmt.Println("You have encryptionConfig enabled, but no encryptionconfig secret has been set.")
			fmt.Println("See `kops create secret encryptionconfig -h` and https://kubernetes.io/docs/tasks/administer-cluster/encrypt-data/")
			return fmt.Errorf("could not find encryptionconfig secret")
		}
		hashBytes := sha256.Sum256(secret.Data)
		encryptionConfigSecretHash = base64.URLEncoding.EncodeToString(hashBytes[:])
	}

	ciliumSpec := c.Cluster.Spec.Networking.Cilium
	if ciliumSpec != nil && ciliumSpec.EnableEncryption && ciliumSpec.EncryptionType == kops.CiliumEncryptionTypeIPSec {
		secret, err := secretStore.FindSecret("ciliumpassword")
		if err != nil {
			return fmt.Errorf("could not load the ciliumpassword secret: %w", err)
		}
		if secret == nil {
			fmt.Println("")
			fmt.Println("You have cilium encryption enabled, but no ciliumpassword secret has been set.")
			fmt.Println("See `kops create secret ciliumpassword -h`")
			return fmt.Errorf("could not find ciliumpassword secret")
		}
	}

	if err := c.addFileAssets(assetBuilder); err != nil {
		return err
	}

	checkExisting := true

	project := ""

	var sshPublicKeys [][]byte
	{
		keys, err := sshCredentialStore.FindSSHPublicKeys()
		if err != nil {
			return fmt.Errorf("error retrieving SSH public key %q: %v", fi.SecretNameSSHPrimary, err)
		}

		for _, k := range keys {
			sshPublicKeys = append(sshPublicKeys, []byte(k.Spec.PublicKey))
		}
	}

	modelContext := &model.KopsModelContext{
		IAMModelContext: iam.IAMModelContext{Cluster: cluster},
		InstanceGroups:  c.InstanceGroups,
	}

	switch kops.CloudProviderID(cluster.Spec.CloudProvider) {
	case kops.CloudProviderGCE:
		{
			gceCloud := cloud.(gce.GCECloud)
			project = gceCloud.Project()

			if !featureflag.AlphaAllowGCE.Enabled() {
				return fmt.Errorf("GCE support is currently alpha, and is feature-gated.  export KOPS_FEATURE_FLAGS=AlphaAllowGCE")
			}

		}

	case kops.CloudProviderDO:
		{
			if len(sshPublicKeys) == 0 && (c.Cluster.Spec.SSHKeyName == nil || *c.Cluster.Spec.SSHKeyName == "") {
				return fmt.Errorf("SSH public key must be specified when running with DigitalOcean (create with `kops create secret --name %s sshpublickey admin -i ~/.ssh/id_rsa.pub`)", cluster.ObjectMeta.Name)
			}
		}
	case kops.CloudProviderAWS:
		{
			awsCloud := cloud.(awsup.AWSCloud)

			accountID, partition, err := awsCloud.AccountInfo()
			if err != nil {
				return err
			}
			modelContext.AWSAccountID = accountID
			modelContext.AWSPartition = partition

			if len(sshPublicKeys) > 1 {
				return fmt.Errorf("exactly one 'admin' SSH public key can be specified when running with AWS; please delete a key using `kops delete secret`")
			}
		}

	case kops.CloudProviderAzure:
		{
			if !featureflag.Azure.Enabled() {
				return fmt.Errorf("azure support is currently alpha, and is feature-gated. Please export KOPS_FEATURE_FLAGS=Azure")
			}

			if len(sshPublicKeys) == 0 {
				return fmt.Errorf("SSH public key must be specified when running with AzureCloud (create with `kops create secret --name %s sshpublickey admin -i ~/.ssh/id_rsa.pub`)", cluster.ObjectMeta.Name)
			}

			if len(sshPublicKeys) != 1 {
				return fmt.Errorf("exactly one 'admin' SSH public key can be specified when running with AzureCloud; please delete a key using `kops delete secret`")
			}
		}
	case kops.CloudProviderOpenstack:
		{
			if len(sshPublicKeys) == 0 {
				return fmt.Errorf("SSH public key must be specified when running with Openstack (create with `kops create secret --name %s sshpublickey admin -i ~/.ssh/id_rsa.pub`)", cluster.ObjectMeta.Name)
			}

			if len(sshPublicKeys) != 1 {
				return fmt.Errorf("exactly one 'admin' SSH public key can be specified when running with Openstack; please delete a key using `kops delete secret`")
			}
		}
	default:
		return fmt.Errorf("unknown CloudProvider %q", cluster.Spec.CloudProvider)
	}

	modelContext.SSHPublicKeys = sshPublicKeys
	modelContext.Region = cloud.Region()

	if dns.IsGossipHostname(cluster.ObjectMeta.Name) {
		klog.Infof("Gossip DNS: skipping DNS validation")
	} else {
		err = validateDNS(cluster, cloud)
		if err != nil {
			return err
		}
	}

	tf := &TemplateFunctions{
		KopsModelContext: *modelContext,
		cloud:            cloud,
	}

	configBuilder, err := newNodeUpConfigBuilder(cluster, assetBuilder, c.Assets, encryptionConfigSecretHash)
	if err != nil {
		return err
	}
	bootstrapScriptBuilder := &model.BootstrapScriptBuilder{
		KopsModelContext:    modelContext,
		Lifecycle:           clusterLifecycle,
		NodeUpConfigBuilder: configBuilder,
		NodeUpAssets:        c.NodeUpAssets,
		Cluster:             cluster,
	}

	{
		templates, err := templates.LoadTemplates(cluster, models.NewAssetPath("cloudup/resources"))
		if err != nil {
			return fmt.Errorf("error loading templates: %v", err)
		}

		err = tf.AddTo(templates.TemplateFunctions, secretStore)
		if err != nil {
			return err
		}

		bcb := bootstrapchannelbuilder.NewBootstrapChannelBuilder(
			modelContext,
			clusterLifecycle,
			assetBuilder,
			templates,
			addons,
		)

		l.Builders = append(l.Builders,

			bcb,
			&model.PKIModelBuilder{
				KopsModelContext: modelContext,
				Lifecycle:        clusterLifecycle,
			},
			&model.IssuerDiscoveryModelBuilder{
				KopsModelContext: modelContext,
				Lifecycle:        clusterLifecycle,
				Cluster:          cluster,
			},
			&kubeapiserver.KubeApiserverBuilder{
				AssetBuilder:     assetBuilder,
				KopsModelContext: modelContext,
				Lifecycle:        clusterLifecycle,
			},
			&etcdmanager.EtcdManagerBuilder{
				AssetBuilder:     assetBuilder,
				KopsModelContext: modelContext,
				Lifecycle:        clusterLifecycle,
			},
			&model.MasterVolumeBuilder{KopsModelContext: modelContext, Lifecycle: clusterLifecycle},
			&model.ConfigBuilder{KopsModelContext: modelContext, Lifecycle: clusterLifecycle},
		)

		switch kops.CloudProviderID(cluster.Spec.CloudProvider) {
		case kops.CloudProviderAWS:
			awsModelContext := &awsmodel.AWSModelContext{
				KopsModelContext: modelContext,
			}

			l.Builders = append(l.Builders,
				&awsmodel.APILoadBalancerBuilder{AWSModelContext: awsModelContext, Lifecycle: clusterLifecycle, SecurityLifecycle: securityLifecycle},
				&awsmodel.BastionModelBuilder{AWSModelContext: awsModelContext, Lifecycle: clusterLifecycle, SecurityLifecycle: securityLifecycle},
				&awsmodel.DNSModelBuilder{AWSModelContext: awsModelContext, Lifecycle: clusterLifecycle},
				&awsmodel.ExternalAccessModelBuilder{AWSModelContext: awsModelContext, Lifecycle: securityLifecycle},
				&awsmodel.FirewallModelBuilder{AWSModelContext: awsModelContext, Lifecycle: securityLifecycle},
				&awsmodel.SSHKeyModelBuilder{AWSModelContext: awsModelContext, Lifecycle: securityLifecycle},
				&awsmodel.NetworkModelBuilder{AWSModelContext: awsModelContext, Lifecycle: networkLifecycle},
				&awsmodel.IAMModelBuilder{AWSModelContext: awsModelContext, Lifecycle: securityLifecycle, Cluster: cluster},
				&awsmodel.OIDCProviderBuilder{AWSModelContext: awsModelContext, Lifecycle: securityLifecycle, KeyStore: keyStore},
			)

			awsModelBuilder := &awsmodel.AutoscalingGroupModelBuilder{
				AWSModelContext:        awsModelContext,
				BootstrapScriptBuilder: bootstrapScriptBuilder,
				Lifecycle:              clusterLifecycle,
				SecurityLifecycle:      securityLifecycle,
				Cluster:                cluster,
			}

			if featureflag.Spotinst.Enabled() {
				l.Builders = append(l.Builders, &awsmodel.SpotInstanceGroupModelBuilder{
					AWSModelContext:        awsModelContext,
					BootstrapScriptBuilder: bootstrapScriptBuilder,
					Lifecycle:              clusterLifecycle,
					SecurityLifecycle:      securityLifecycle,
				})

				if featureflag.SpotinstHybrid.Enabled() {
					l.Builders = append(l.Builders, awsModelBuilder)
				}
			} else {
				l.Builders = append(l.Builders, awsModelBuilder)
			}

			nth := c.Cluster.Spec.NodeTerminationHandler
			if nth != nil && fi.BoolValue(nth.Enabled) && fi.BoolValue(nth.EnableSQSTerminationDraining) {
				l.Builders = append(l.Builders, &awsmodel.NodeTerminationHandlerBuilder{
					AWSModelContext: awsModelContext,
					Lifecycle:       clusterLifecycle,
				})
			}

		case kops.CloudProviderDO:
			doModelContext := &domodel.DOModelContext{
				KopsModelContext: modelContext,
			}
			l.Builders = append(l.Builders,
				&domodel.APILoadBalancerModelBuilder{DOModelContext: doModelContext, Lifecycle: securityLifecycle},
				&domodel.DropletBuilder{DOModelContext: doModelContext, BootstrapScriptBuilder: bootstrapScriptBuilder, Lifecycle: clusterLifecycle},
			)
		case kops.CloudProviderGCE:
			gceModelContext := &gcemodel.GCEModelContext{
				ProjectID:        project,
				KopsModelContext: modelContext,
			}

			storageACLLifecycle := securityLifecycle
			if storageACLLifecycle != fi.LifecycleIgnore {
				// This is a best-effort permissions fix
				storageACLLifecycle = fi.LifecycleWarnIfInsufficientAccess
			}

			l.Builders = append(l.Builders,

				&gcemodel.APILoadBalancerBuilder{GCEModelContext: gceModelContext, Lifecycle: securityLifecycle},
				&gcemodel.ExternalAccessModelBuilder{GCEModelContext: gceModelContext, Lifecycle: securityLifecycle},
				&gcemodel.FirewallModelBuilder{GCEModelContext: gceModelContext, Lifecycle: securityLifecycle},
				&gcemodel.NetworkModelBuilder{GCEModelContext: gceModelContext, Lifecycle: networkLifecycle},
				&gcemodel.StorageAclBuilder{GCEModelContext: gceModelContext, Cloud: cloud.(gce.GCECloud), Lifecycle: storageACLLifecycle},
				&gcemodel.AutoscalingGroupModelBuilder{GCEModelContext: gceModelContext, BootstrapScriptBuilder: bootstrapScriptBuilder, Lifecycle: clusterLifecycle},
				&gcemodel.ServiceAccountsBuilder{GCEModelContext: gceModelContext, Lifecycle: clusterLifecycle},
			)
		case kops.CloudProviderAzure:
			azureModelContext := &azuremodel.AzureModelContext{
				KopsModelContext: modelContext,
			}
			l.Builders = append(l.Builders,
				&azuremodel.APILoadBalancerModelBuilder{AzureModelContext: azureModelContext, Lifecycle: clusterLifecycle},
				&azuremodel.NetworkModelBuilder{AzureModelContext: azureModelContext, Lifecycle: clusterLifecycle},
				&azuremodel.ResourceGroupModelBuilder{AzureModelContext: azureModelContext, Lifecycle: clusterLifecycle},

				&azuremodel.VMScaleSetModelBuilder{AzureModelContext: azureModelContext, BootstrapScriptBuilder: bootstrapScriptBuilder, Lifecycle: clusterLifecycle},
			)
		case kops.CloudProviderOpenstack:
			openstackModelContext := &openstackmodel.OpenstackModelContext{
				KopsModelContext: modelContext,
			}

			l.Builders = append(l.Builders,
				&openstackmodel.NetworkModelBuilder{OpenstackModelContext: openstackModelContext, Lifecycle: networkLifecycle},
				&openstackmodel.SSHKeyModelBuilder{OpenstackModelContext: openstackModelContext, Lifecycle: securityLifecycle},
				&openstackmodel.FirewallModelBuilder{OpenstackModelContext: openstackModelContext, Lifecycle: securityLifecycle},
				&openstackmodel.ServerGroupModelBuilder{OpenstackModelContext: openstackModelContext, BootstrapScriptBuilder: bootstrapScriptBuilder, Lifecycle: clusterLifecycle},
			)

		default:
			return fmt.Errorf("unknown cloudprovider %q", cluster.Spec.CloudProvider)
		}
	}
	c.TaskMap, err = l.BuildTasks(c.LifecycleOverrides)
	if err != nil {
		return fmt.Errorf("error building tasks: %v", err)
	}

	var target fi.Target
	shouldPrecreateDNS := true

	switch c.TargetName {
	case TargetDirect:
		switch kops.CloudProviderID(cluster.Spec.CloudProvider) {
		case kops.CloudProviderGCE:
			target = gce.NewGCEAPITarget(cloud.(gce.GCECloud))
		case kops.CloudProviderAWS:
			target = awsup.NewAWSAPITarget(cloud.(awsup.AWSCloud))
		case kops.CloudProviderDO:
			target = do.NewDOAPITarget(cloud.(do.DOCloud))
		case kops.CloudProviderOpenstack:
			target = openstack.NewOpenstackAPITarget(cloud.(openstack.OpenstackCloud))
		case kops.CloudProviderAzure:
			target = azure.NewAzureAPITarget(cloud.(azure.AzureCloud))
		default:
			return fmt.Errorf("direct configuration not supported with CloudProvider:%q", cluster.Spec.CloudProvider)
		}

	case TargetTerraform:
		checkExisting = false
		outDir := c.OutDir
		var vfsProvider *vfs.TerraformProvider
		if tfPath, ok := configBase.(vfs.TerraformPath); ok && featureflag.TerraformManagedFiles.Enabled() {
			var err error
			vfsProvider, err = tfPath.TerraformProvider()
			if err != nil {
				return err
			}
		}
		tf := terraform.NewTerraformTarget(cloud, project, vfsProvider, outDir, cluster.Spec.Target)

		// We include a few "util" variables in the TF output
		if err := tf.AddOutputVariable("region", terraformWriter.LiteralFromStringValue(cloud.Region())); err != nil {
			return err
		}

		if project != "" {
			if err := tf.AddOutputVariable("project", terraformWriter.LiteralFromStringValue(project)); err != nil {
				return err
			}
		}

		if err := tf.AddOutputVariable("cluster_name", terraformWriter.LiteralFromStringValue(cluster.ObjectMeta.Name)); err != nil {
			return err
		}

		target = tf

		// Can cause conflicts with terraform management
		shouldPrecreateDNS = false

	case TargetCloudformation:
		checkExisting = false
		outDir := c.OutDir
		target = cloudformation.NewCloudformationTarget(cloud, project, outDir)

		// Can cause conflicts with cloudformation management
		shouldPrecreateDNS = false

		fmt.Printf("\n")
		fmt.Printf("%s\n", starline)
		fmt.Printf("\n")
		fmt.Printf("Kops support for CloudFormation is deprecated and will be removed in a future release.\n")
		fmt.Printf("\n")
		fmt.Printf("%s\n", starline)
		fmt.Printf("\n")

	case TargetDryRun:
		var out io.Writer = os.Stdout
		if c.GetAssets {
			out = io.Discard
		}
		target = fi.NewDryRunTarget(assetBuilder, out)

		// Avoid making changes on a dry-run
		shouldPrecreateDNS = false

	default:
		return fmt.Errorf("unsupported target type %q", c.TargetName)
	}
	c.Target = target

	if checkExisting {
		c.TaskMap, err = l.FindDeletions(cloud, c.LifecycleOverrides)
		if err != nil {
			return fmt.Errorf("error finding deletions: %w", err)
		}
	}

	context, err := fi.NewContext(target, cluster, cloud, keyStore, secretStore, configBase, checkExisting, c.TaskMap)
	if err != nil {
		return fmt.Errorf("error building context: %v", err)
	}
	defer context.Close()

	var options fi.RunTasksOptions
	if c.RunTasksOptions != nil {
		options = *c.RunTasksOptions
	} else {
		options.InitDefaults()
	}

	err = context.RunTasks(options)
	if err != nil {
		return fmt.Errorf("error running tasks: %v", err)
	}

	if dns.IsGossipHostname(cluster.Name) {
		shouldPrecreateDNS = false
	}

	if shouldPrecreateDNS && clusterLifecycle != fi.LifecycleIgnore {
		if err := precreateDNS(ctx, cluster, cloud); err != nil {
			klog.Warningf("unable to pre-create DNS records - cluster startup may be slower: %v", err)
		}
	}

	err = target.Finish(c.TaskMap) // This will finish the apply, and print the changes
	if err != nil {
		return fmt.Errorf("error closing target: %v", err)
	}

	c.ImageAssets = assetBuilder.ImageAssets
	c.FileAssets = assetBuilder.FileAssets

	return nil
}