v2/api/containerservice/customizations/managed_cluster_extensions.go (175 lines of code) (raw):

/* * Copyright (c) Microsoft Corporation. * Licensed under the MIT license. */ package customizations import ( "context" "fmt" "strings" . "github.com/Azure/azure-service-operator/v2/internal/logging" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice" "github.com/go-logr/logr" "github.com/rotisserie/eris" v1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/conversion" containerservice "github.com/Azure/azure-service-operator/v2/api/containerservice/v1api20240901/storage" "github.com/Azure/azure-service-operator/v2/internal/genericarmclient" "github.com/Azure/azure-service-operator/v2/internal/resolver" "github.com/Azure/azure-service-operator/v2/internal/set" "github.com/Azure/azure-service-operator/v2/pkg/genruntime" "github.com/Azure/azure-service-operator/v2/pkg/genruntime/core" "github.com/Azure/azure-service-operator/v2/pkg/genruntime/extensions" "github.com/Azure/azure-service-operator/v2/pkg/genruntime/secrets" ) const ( adminCredentialsKey = "adminCredentials" userCredentialsKey = "userCredentials" ) var _ genruntime.KubernetesSecretExporter = &ManagedClusterExtension{} func (ext *ManagedClusterExtension) ExportKubernetesSecrets( ctx context.Context, obj genruntime.MetaObject, additionalSecrets set.Set[string], armClient *genericarmclient.GenericClient, log logr.Logger, ) (*genruntime.KubernetesSecretExportResult, error) { // This has to be the current hub storage version. It will need to be updated // if the hub storage version changes. typedObj, ok := obj.(*containerservice.ManagedCluster) if !ok { return nil, eris.Errorf("cannot run on unknown resource type %T, expected *containerservice.ManagedCluster", obj) } // Type assert that we are the hub type. This will fail to compile if // the hub type has been changed but this extension has not var _ conversion.Hub = typedObj primarySecrets := secretsSpecified(typedObj) requestedSecrets := set.Union(primarySecrets, additionalSecrets) if len(requestedSecrets) == 0 { log.V(Debug).Info("No secrets retrieval to perform as operatorSpec is empty") return nil, nil } id, err := genruntime.GetAndParseResourceID(typedObj) if err != nil { return nil, err } subscription := id.SubscriptionID // Using armClient.ClientOptions() here ensures we share the same HTTP connection, so this is not opening a new // connection each time through var mcClient *armcontainerservice.ManagedClustersClient mcClient, err = armcontainerservice.NewManagedClustersClient(subscription, armClient.Creds(), armClient.ClientOptions()) if err != nil { return nil, eris.Wrapf(err, "failed to create new ManagedClustersClient") } // TODO: In the future we may need variants of these secret properties that configure usage of the public FQDN rather than the private one, see: // TODO: https://docs.microsoft.com/en-us/answers/questions/670332/azure-aks-get-credentials-using-wrong-hostname-for.html var adminCredentials string if requestedSecrets.Contains(adminCredentialsKey) { var resp armcontainerservice.ManagedClustersClientListClusterAdminCredentialsResponse resp, err = mcClient.ListClusterAdminCredentials(ctx, id.ResourceGroupName, typedObj.AzureName(), nil) if err != nil { return nil, eris.Wrapf(err, "failed listing admin credentials") } if len(resp.Kubeconfigs) > 0 { // It's awkward that we're ignoring the other possible responses here, but that's what the AZ CLI does too: // https://github.com/Azure/azure-cli/blob/6786b5014ae71eb6d93f95e1ad123e9171368e8f/src/azure-cli/azure/cli/command_modules/acs/custom.py#L2166 adminCredentials = string(resp.CredentialResults.Kubeconfigs[0].Value) } } var userCredentials string if requestedSecrets.Contains(userCredentialsKey) { var resp armcontainerservice.ManagedClustersClientListClusterUserCredentialsResponse resp, err = mcClient.ListClusterUserCredentials(ctx, id.ResourceGroupName, typedObj.AzureName(), nil) if err != nil { return nil, eris.Wrapf(err, "failed listing admin credentials") } if len(resp.Kubeconfigs) > 0 { // It's awkward that we're ignoring the other possible responses here, but that's what the AZ CLI does too: // https://github.com/Azure/azure-cli/blob/6786b5014ae71eb6d93f95e1ad123e9171368e8f/src/azure-cli/azure/cli/command_modules/acs/custom.py#L2166 userCredentials = string(resp.CredentialResults.Kubeconfigs[0].Value) } } secretSlice, err := secretsToWrite(typedObj, adminCredentials, userCredentials) if err != nil { return nil, err } resolvedSecrets := map[string]string{} if adminCredentials != "" { resolvedSecrets[adminCredentialsKey] = adminCredentials } if userCredentials != "" { resolvedSecrets[userCredentialsKey] = userCredentials } return &genruntime.KubernetesSecretExportResult{ Objs: secrets.SliceToClientObjectSlice(secretSlice), RawSecrets: secrets.SelectSecrets(additionalSecrets, resolvedSecrets), }, nil } func secretsSpecified(obj *containerservice.ManagedCluster) set.Set[string] { if obj.Spec.OperatorSpec == nil || obj.Spec.OperatorSpec.Secrets == nil { return nil } secrets := obj.Spec.OperatorSpec.Secrets result := set.Set[string]{} if secrets.AdminCredentials != nil { result.Add(adminCredentialsKey) } if secrets.UserCredentials != nil { result.Add(userCredentialsKey) } return result } func secretsToWrite(obj *containerservice.ManagedCluster, adminCreds string, userCreds string) ([]*v1.Secret, error) { operatorSpecSecrets := obj.Spec.OperatorSpec.Secrets if operatorSpecSecrets == nil { return nil, nil } collector := secrets.NewCollector(obj.Namespace) collector.AddValue(operatorSpecSecrets.AdminCredentials, adminCreds) collector.AddValue(operatorSpecSecrets.UserCredentials, userCreds) return collector.Values() } var _ extensions.PreReconciliationChecker = &ManagedClusterExtension{} // If a managed cluster has a provisioningState not in this set, it will reject any attempt to PUT a new state out of // hand; so there's no point in even trying. This is true even if the PUT we're doing will have no effect on the state // of the cluster. // These are all listed lowercase, so we can do a case-insensitive match. var nonBlockingManagedClusterProvisioningStates = set.Make( "succeeded", "failed", "canceled", ) func (ext *ManagedClusterExtension) PreReconcileCheck( ctx context.Context, obj genruntime.MetaObject, owner genruntime.MetaObject, resourceResolver *resolver.Resolver, armClient *genericarmclient.GenericClient, log logr.Logger, next extensions.PreReconcileCheckFunc, ) (extensions.PreReconcileCheckResult, error) { // This has to be the current hub storage version. It will need to be updated // if the hub storage version changes. managedCluster, ok := obj.(*containerservice.ManagedCluster) if !ok { return extensions.PreReconcileCheckResult{}, eris.Errorf("cannot run on unknown resource type %T, expected *containerservice.ManagedCluster", obj) } // Type assert that we are the hub type. This will fail to compile if // the hub type has been changed but this extension has not var _ conversion.Hub = managedCluster // If the cluster is in a state that will reject any PUT, then we should skip reconciliation // as there's no point in even trying. // This allows us to "play nice with others" and not use up request quota attempting to make changes when we // already know those attempts will fail. state := managedCluster.Status.ProvisioningState if state != nil && clusterProvisioningStateBlocksReconciliation(state) { return extensions.BlockReconcile( fmt.Sprintf("Managed cluster is in provisioning state %q", *state)), nil } return next(ctx, obj, owner, resourceResolver, armClient, log) } func clusterProvisioningStateBlocksReconciliation(provisioningState *string) bool { if provisioningState == nil { return false } return !nonBlockingManagedClusterProvisioningStates.Contains(strings.ToLower(*provisioningState)) } var _ extensions.ErrorClassifier = &ManagedClusterExtension{} // ClassifyError evaluates the provided error, returning including whether it is fatal or can be retried. // cloudError is the error returned from ARM. // apiVersion is the ARM API version used for the request. // log is a logger than can be used for telemetry. // next is the next implementation to call. func (ext *ManagedClusterExtension) ClassifyError( cloudError *genericarmclient.CloudError, apiVersion string, log logr.Logger, next extensions.ErrorClassifierFunc, ) (core.CloudErrorDetails, error) { details, err := next(cloudError) if err != nil { return core.CloudErrorDetails{}, err } if isRetryableClusterError(cloudError) { details.Classification = core.ErrorRetryable } return details, nil } func isRetryableClusterError(err *genericarmclient.CloudError) bool { if err == nil { return false } // A CustomKubeletIdentityMissingPermissionError can occur if the user-assigned identity required by the cluster // hasn't yet been provisioned; we want to retry so that we finish provisioning the cluster once it is available. if err.Code() == "CustomKubeletIdentityMissingPermissionError" { return true } return false }