virtualcluster/nodes_create.go (80 lines of code) (raw):

// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. package virtualcluster import ( "context" "fmt" "time" "github.com/Azure/kperf/helmcli" "github.com/Azure/kperf/manifests" ) // CreateNodepool creates a new node pool. // // TODO: // 1. create a new package to define ErrNotFound, ErrAlreadyExists, ... errors. // 2. support configurable timeout. // // FIXME: // // Some cloud providers will delete unknown or not-ready nodes. If we render // both nodes and controllers in one helm release, helm won't wait for // controller ready before creating nodes. The nodes will be deleted by cloud // providers. The helm's post-install or post-upgrade hook can ensure that it // won't deploy nodes until controllers ready. However, resources created by // helm hook aren't part of helm release. We need extra step to cleanup nodes // resources when we delete nodepool's helm release. Based on this fact, we // separate one helm release into two. One is for controllers and other one // is for nodes. // // However, it's not a guarantee. When controller was deleted and it takes long // time to restart, the node will be marked NotReady and deleted by cloud providers. // Maybe we can consider to contribute to difference cloud providers with // workaround. For example, if node.Spec.ProviderID contains `?ignore=virtual`, // the cloud providers should ignore this kind of nodes. func CreateNodepool(ctx context.Context, kubeCfgPath string, nodepoolName string, opts ...NodepoolOpt) (retErr error) { cfg := defaultNodepoolCfg for _, opt := range opts { opt(&cfg) } cfg.name = nodepoolName if err := cfg.validate(); err != nil { return err } getCli, err := helmcli.NewGetCli(kubeCfgPath, virtualnodeReleaseNamespace) if err != nil { return fmt.Errorf("failed to create helm get client: %w", err) } _, err = getCli.Get(cfg.nodeHelmReleaseName()) if err == nil { return fmt.Errorf("nodepool %s already exists", cfg.nodeHelmReleaseName()) } cleanupFn, err := createNodepoolController(ctx, kubeCfgPath, &cfg) if err != nil { return err } defer func() { // NOTE: Try best to cleanup. If there is leaky resources after // force stop, like kill process, it needs cleanup manually. if retErr != nil { _ = cleanupFn() } }() ch, err := manifests.LoadChart(virtualnodeChartName) if err != nil { return fmt.Errorf("failed to load virtual node chart: %w", err) } valueAppliers, err := cfg.toNodeHelmValuesAppliers() if err != nil { return err } releaseCli, err := helmcli.NewReleaseCli( kubeCfgPath, virtualnodeReleaseNamespace, cfg.nodeHelmReleaseName(), ch, virtualnodeReleaseLabels, valueAppliers..., ) if err != nil { return fmt.Errorf("failed to create helm release client: %w", err) } return releaseCli.Deploy(ctx, 30*time.Minute) } // createNodepoolController creates node controller release. func createNodepoolController(ctx context.Context, kubeCfgPath string, cfg *nodepoolConfig) (_cleanup func() error, _ error) { ch, err := manifests.LoadChart(virtualnodeControllerChartName) if err != nil { return nil, fmt.Errorf("failed to load virtual node controller chart: %w", err) } appliers, err := cfg.toNodeControllerHelmValuesAppliers() if err != nil { return nil, err } releaseCli, err := helmcli.NewReleaseCli( kubeCfgPath, virtualnodeReleaseNamespace, cfg.nodeControllerHelmReleaseName(), ch, virtualnodeReleaseLabels, appliers..., ) if err != nil { return nil, fmt.Errorf("failed to create helm release client: %w", err) } if err := releaseCli.Deploy(ctx, 30*time.Minute); err != nil { return nil, fmt.Errorf("failed to deploy virtual node controller: %w", err) } return releaseCli.Uninstall, nil }