cluster-autoscaler/cloudprovider/azure/azure_config.go (406 lines of code) (raw):
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package azure
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"strconv"
"strings"
"time"
"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/azure"
"k8s.io/klog/v2"
azclients "sigs.k8s.io/cloud-provider-azure/pkg/azureclients"
providerazureconsts "sigs.k8s.io/cloud-provider-azure/pkg/consts"
providerazure "sigs.k8s.io/cloud-provider-azure/pkg/provider"
providerazureconfig "sigs.k8s.io/cloud-provider-azure/pkg/provider/config"
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
)
const (
// The path of deployment parameters for standard vm.
deploymentParametersPath = "/var/lib/azure/azuredeploy.parameters.json"
imdsServerURL = "http://169.254.169.254"
// auth methods
authMethodPrincipal = "principal"
authMethodCLI = "cli"
)
// Config holds the configuration parsed from the --cloud-config flag or the environment variables.
// Contains both general Azure cloud provider configuration (i.e., in azure.json) and CAS configurations/options specifically for Azure provider.
type Config struct {
// Azure cloud provider configuration, which is generally shared with other Azure components.
providerazure.Config `json:",inline" yaml:",inline"`
// Legacy fields, which are only here for backward compatibility. To be deprecated.
legacyConfig `json:",inline" yaml:",inline"`
ClusterName string `json:"clusterName" yaml:"clusterName"`
// ClusterResourceGroup is the resource group where the cluster is located.
ClusterResourceGroup string `json:"clusterResourceGroup" yaml:"clusterResourceGroup"`
// ARMBaseURLForAPClient is the URL to use for operations for the VMs pool.
// It can override the default public ARM endpoint for VMs pool scale operations.
ARMBaseURLForAPClient string `json:"armBaseURLForAPClient" yaml:"armBaseURLForAPClient"`
// AuthMethod determines how to authorize requests for the Azure
// cloud. Valid options are "principal" (= the traditional
// service principle approach) and "cli" (= load az command line
// config file). The default is "principal".
// 08/16/2024: This field is awkward, given the existence of UseManagedIdentityExtension and UseFederatedWorkloadIdentityExtension.
// Ideally, either it should be deprecated, or reworked to be on the same "dimension" as the two above, if not reworking those two.
AuthMethod string `json:"authMethod" yaml:"authMethod"`
// Configs only for standard vmType (agent pools).
Deployment string `json:"deployment" yaml:"deployment"`
DeploymentParameters map[string]interface{} `json:"deploymentParameters" yaml:"deploymentParameters"`
// Jitter in seconds subtracted from the VMSS cache TTL before the first refresh
VmssVmsCacheJitter int `json:"vmssVmsCacheJitter" yaml:"vmssVmsCacheJitter"`
// number of latest deployments that will not be deleted
MaxDeploymentsCount int64 `json:"maxDeploymentsCount" yaml:"maxDeploymentsCount"`
// EnableForceDelete defines whether to enable force deletion on the APIs
EnableForceDelete bool `json:"enableForceDelete,omitempty" yaml:"enableForceDelete,omitempty"`
// (DEPRECATED, DO NOT USE) EnableDynamicInstanceList defines whether to enable dynamic instance workflow for instance information check
EnableDynamicInstanceList bool `json:"enableDynamicInstanceList,omitempty" yaml:"enableDynamicInstanceList,omitempty"`
// (DEPRECATED, DO NOT USE) EnableDetailedCSEMessage defines whether to emit error messages in the CSE error body info
EnableDetailedCSEMessage bool `json:"enableDetailedCSEMessage,omitempty" yaml:"enableDetailedCSEMessage,omitempty"`
// (DEPRECATED, DO NOT USE) GetVmssSizeRefreshPeriod (seconds) defines how frequently to call GET VMSS API to fetch VMSS info per nodegroup instance
GetVmssSizeRefreshPeriod int `json:"getVmssSizeRefreshPeriod,omitempty" yaml:"getVmssSizeRefreshPeriod,omitempty"`
// StrictCacheUpdates updates cache values only after positive validation from Azure APIs
StrictCacheUpdates bool `json:"strictCacheUpdates,omitempty" yaml:"strictCacheUpdates,omitempty"`
// EnableFastDeleteOnFailedProvisioning defines whether to delete the experimental faster VMSS instance deletion on failed provisioning
EnableFastDeleteOnFailedProvisioning bool `json:"enableFastDeleteOnFailedProvisioning,omitempty" yaml:"enableFastDeleteOnFailedProvisioning,omitempty"`
}
// These are only here for backward compabitility. Their equivalent exists in providerazure.Config with a different name.
type legacyConfig struct {
// Being renamed to UseFederatedWorkloadIdentityExtension
UseWorkloadIdentityExtension *bool `json:"useWorkloadIdentityExtension" yaml:"useWorkloadIdentityExtension"`
// VMSS metadata cache TTL in seconds, only applies for vmss type; being renamed to VmssCacheTTLInSeconds
VmssCacheTTL *int64 `json:"vmssCacheTTL" yaml:"vmssCacheTTL"`
// VMSS instances cache TTL in seconds, only applies for vmss type; being renamed to VmssVirtualMachinesCacheTTLInSeconds
VmssVmsCacheTTL *int64 `json:"vmssVmsCacheTTL" yaml:"vmssVmsCacheTTL"`
// EnableVmssFlex defines whether to enable Vmss Flex support or not; being renamed to EnableVmssFlexNodes
EnableVmssFlex *bool `json:"enableVmssFlex,omitempty" yaml:"enableVmssFlex,omitempty"`
}
// BuildAzureConfig returns a Config object for the Azure clients
func BuildAzureConfig(configReader io.Reader) (*Config, error) {
var err error
cfg := &Config{}
// Static defaults
cfg.EnableDynamicInstanceList = false
cfg.EnableVmssFlexNodes = false
cfg.CloudProviderBackoffRetries = providerazureconsts.BackoffRetriesDefault
cfg.CloudProviderBackoffExponent = providerazureconsts.BackoffExponentDefault
cfg.CloudProviderBackoffDuration = providerazureconsts.BackoffDurationDefault
cfg.CloudProviderBackoffJitter = providerazureconsts.BackoffJitterDefault
cfg.VMType = providerazureconsts.VMTypeVMSS
cfg.MaxDeploymentsCount = int64(defaultMaxDeploymentsCount)
cfg.StrictCacheUpdates = false
// Config file overrides defaults
if configReader != nil {
body, err := ioutil.ReadAll(configReader)
if err != nil {
return nil, fmt.Errorf("failed to read config: %v", err)
}
err = json.Unmarshal(body, cfg)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal config body: %v", err)
}
}
// Legacy config fields, take precedence if provided.
if cfg.UseWorkloadIdentityExtension != nil {
cfg.UseFederatedWorkloadIdentityExtension = *cfg.UseWorkloadIdentityExtension
}
if cfg.VmssCacheTTL != nil {
if *cfg.VmssCacheTTL > int64(^uint32(0)) {
return nil, fmt.Errorf("VmssCacheTTL value %d is too large", *cfg.VmssCacheTTL)
}
cfg.VmssCacheTTLInSeconds = int(*cfg.VmssCacheTTL)
}
if cfg.VmssVmsCacheTTL != nil {
if *cfg.VmssVmsCacheTTL > int64(^uint32(0)) {
return nil, fmt.Errorf("VmssVmsCacheTTL value %d is too large", *cfg.VmssVmsCacheTTL)
}
cfg.VmssVirtualMachinesCacheTTLInSeconds = int(*cfg.VmssVmsCacheTTL)
}
if cfg.EnableVmssFlex != nil {
cfg.EnableVmssFlexNodes = *cfg.EnableVmssFlex
}
// Each of these environment variables, if provided, will override what's in the config file.
// Note that this "retrieval from env" does not exist in cloud-provider-azure library (at the time of this comment).
if _, err = assignFromEnvIfExists(&cfg.ClusterName, "CLUSTER_NAME"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.ClusterResourceGroup, "ARM_CLUSTER_RESOURCE_GROUP"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.ARMBaseURLForAPClient, "ARM_BASE_URL_FOR_AP_CLIENT"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.Cloud, "ARM_CLOUD"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.Location, "LOCATION"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.ResourceGroup, "ARM_RESOURCE_GROUP"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.TenantID, "ARM_TENANT_ID"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.TenantID, "AZURE_TENANT_ID"); err != nil { // taking precedence
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.AADClientID, "ARM_CLIENT_ID"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.AADClientID, "AZURE_CLIENT_ID"); err != nil { // taking precedence
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.AADFederatedTokenFile, "AZURE_FEDERATED_TOKEN_FILE"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.AADClientSecret, "ARM_CLIENT_SECRET"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.VMType, "ARM_VM_TYPE"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.AADClientCertPath, "ARM_CLIENT_CERT_PATH"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.AADClientCertPassword, "ARM_CLIENT_CERT_PASSWORD"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.Deployment, "ARM_DEPLOYMENT"); err != nil {
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.SubscriptionID, "ARM_SUBSCRIPTION_ID"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.UseManagedIdentityExtension, "ARM_USE_MANAGED_IDENTITY_EXTENSION"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.UseFederatedWorkloadIdentityExtension, "ARM_USE_FEDERATED_WORKLOAD_IDENTITY_EXTENSION"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.UseFederatedWorkloadIdentityExtension, "ARM_USE_WORKLOAD_IDENTITY_EXTENSION"); err != nil { // taking precedence
return nil, err
}
if _, err = assignFromEnvIfExists(&cfg.UserAssignedIdentityID, "ARM_USER_ASSIGNED_IDENTITY_ID"); err != nil {
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.VmssCacheTTLInSeconds, "AZURE_VMSS_CACHE_TTL_IN_SECONDS"); err != nil {
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.VmssCacheTTLInSeconds, "AZURE_VMSS_CACHE_TTL"); err != nil { // taking precedence
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.VmssVirtualMachinesCacheTTLInSeconds, "AZURE_VMSS_VMS_CACHE_TTL_IN_SECONDS"); err != nil {
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.VmssVirtualMachinesCacheTTLInSeconds, "AZURE_VMSS_VMS_CACHE_TTL"); err != nil { // taking precedence
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.VmssVmsCacheJitter, "AZURE_VMSS_VMS_CACHE_JITTER"); err != nil {
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.GetVmssSizeRefreshPeriod, "AZURE_GET_VMSS_SIZE_REFRESH_PERIOD"); err != nil {
return nil, err
}
if _, err = assignInt64FromEnvIfExists(&cfg.MaxDeploymentsCount, "AZURE_MAX_DEPLOYMENT_COUNT"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.CloudProviderBackoff, "ENABLE_BACKOFF"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.EnableForceDelete, "AZURE_ENABLE_FORCE_DELETE"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.StrictCacheUpdates, "AZURE_STRICT_CACHE_UPDATES"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.EnableDynamicInstanceList, "AZURE_ENABLE_DYNAMIC_INSTANCE_LIST"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.EnableDetailedCSEMessage, "AZURE_ENABLE_DETAILED_CSE_MESSAGE"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.EnableVmssFlexNodes, "AZURE_ENABLE_VMSS_FLEX_NODES"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.EnableVmssFlexNodes, "AZURE_ENABLE_VMSS_FLEX"); err != nil { // taking precedence
return nil, err
}
if cfg.CloudProviderBackoff {
if _, err = assignIntFromEnvIfExists(&cfg.CloudProviderBackoffRetries, "BACKOFF_RETRIES"); err != nil {
return nil, err
}
if _, err = assignFloat64FromEnvIfExists(&cfg.CloudProviderBackoffExponent, "BACKOFF_EXPONENT"); err != nil {
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.CloudProviderBackoffDuration, "BACKOFF_DURATION"); err != nil {
return nil, err
}
if _, err = assignFloat64FromEnvIfExists(&cfg.CloudProviderBackoffJitter, "BACKOFF_JITTER"); err != nil {
return nil, err
}
}
if _, err = assignBoolFromEnvIfExists(&cfg.CloudProviderRateLimit, "CLOUD_PROVIDER_RATE_LIMIT"); err != nil {
return nil, err
}
if _, err = assignFloat32FromEnvIfExists(&cfg.CloudProviderRateLimitQPS, "RATE_LIMIT_READ_QPS"); err != nil {
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.CloudProviderRateLimitBucket, "RATE_LIMIT_READ_BUCKETS"); err != nil {
return nil, err
}
if _, err = assignFloat32FromEnvIfExists(&cfg.CloudProviderRateLimitQPSWrite, "RATE_LIMIT_WRITE_QPS"); err != nil {
return nil, err
}
if _, err = assignIntFromEnvIfExists(&cfg.CloudProviderRateLimitBucketWrite, "RATE_LIMIT_WRITE_BUCKETS"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.EnableFastDeleteOnFailedProvisioning, "AZURE_ENABLE_FAST_DELETE_ON_FAILED_PROVISIONING"); err != nil {
return nil, err
}
// Nonstatic defaults
cfg.VMType = strings.ToLower(cfg.VMType)
if cfg.MaxDeploymentsCount == 0 {
// 0 means "use default" in this case.
// This means, if it is valued by the config file, but explicitly set to 0 in the env, it will retreat to default.
cfg.MaxDeploymentsCount = int64(defaultMaxDeploymentsCount)
}
if cfg.SubscriptionID == "" {
metadataService, err := providerazure.NewInstanceMetadataService(imdsServerURL)
if err != nil {
return nil, err
}
metadata, err := metadataService.GetMetadata(0)
if err != nil {
return nil, err
}
cfg.SubscriptionID = metadata.Compute.SubscriptionID
}
if cfg.VMType == providerazureconsts.VMTypeStandard && len(cfg.DeploymentParameters) == 0 {
// Read parameters from deploymentParametersPath if it is not set.
parameters, err := readDeploymentParameters(deploymentParametersPath)
if err != nil {
klog.Errorf("readDeploymentParameters failed with error: %v", err)
return nil, err
}
cfg.DeploymentParameters = parameters
}
providerazureconfig.InitializeCloudProviderRateLimitConfig(&cfg.CloudProviderRateLimitConfig)
if err := cfg.validate(); err != nil {
return nil, err
}
return cfg, nil
}
// A "fork" of az.getAzureClientConfig with BYO authorizer (e.g., for CLI auth) and custom polling delay support
func (cfg *Config) getAzureClientConfig(authorizer autorest.Authorizer, env *azure.Environment) *azclients.ClientConfig {
pollingDelay := 30 * time.Second
azClientConfig := &azclients.ClientConfig{
CloudName: cfg.Cloud,
Location: cfg.Location,
SubscriptionID: cfg.SubscriptionID,
ResourceManagerEndpoint: env.ResourceManagerEndpoint,
Authorizer: authorizer,
Backoff: &retry.Backoff{Steps: 1},
RestClientConfig: azclients.RestClientConfig{
PollingDelay: &pollingDelay,
},
DisableAzureStackCloud: cfg.DisableAzureStackCloud,
UserAgent: cfg.UserAgent,
}
if cfg.CloudProviderBackoff {
azClientConfig.Backoff = &retry.Backoff{
Steps: cfg.CloudProviderBackoffRetries,
Factor: cfg.CloudProviderBackoffExponent,
Duration: time.Duration(cfg.CloudProviderBackoffDuration) * time.Second,
Jitter: cfg.CloudProviderBackoffJitter,
}
}
if cfg.HasExtendedLocation() {
azClientConfig.ExtendedLocation = &azclients.ExtendedLocation{
Name: cfg.ExtendedLocationName,
Type: cfg.ExtendedLocationType,
}
}
return azClientConfig
}
func (cfg *Config) validate() error {
if cfg.ResourceGroup == "" {
return fmt.Errorf("resource group not set")
}
if cfg.VMType == providerazureconsts.VMTypeStandard {
if cfg.Deployment == "" {
return fmt.Errorf("deployment not set")
}
if len(cfg.DeploymentParameters) == 0 {
return fmt.Errorf("deploymentParameters not set")
}
}
if cfg.SubscriptionID == "" {
return fmt.Errorf("subscription ID not set")
}
if cfg.UseManagedIdentityExtension && cfg.UseFederatedWorkloadIdentityExtension {
return fmt.Errorf("you can not combine both managed identity and workload identity as an authentication mechanism")
}
if cfg.VMType != providerazureconsts.VMTypeStandard && cfg.VMType != providerazureconsts.VMTypeVMSS {
return fmt.Errorf("unsupported VM type: %s", cfg.VMType)
}
if !cfg.UseManagedIdentityExtension && !cfg.UseFederatedWorkloadIdentityExtension {
if cfg.TenantID == "" {
return fmt.Errorf("tenant ID not set")
}
switch cfg.AuthMethod {
case "", authMethodPrincipal:
if cfg.AADClientID == "" {
return fmt.Errorf("ARM Client ID not set")
}
case authMethodCLI:
// Nothing to check at the moment.
default:
return fmt.Errorf("unsupported authorization method: %s", cfg.AuthMethod)
}
}
if cfg.CloudProviderBackoff && cfg.CloudProviderBackoffRetries == 0 {
return fmt.Errorf("Cloud provider backoff is enabled but retries are not set")
}
return nil
}
func assignFromEnvIfExists(assignee *string, name string) (bool, error) {
if assignee == nil {
return false, fmt.Errorf("assignee is nil")
}
if val, present := os.LookupEnv(name); present && strings.TrimSpace(val) != "" {
*assignee = strings.TrimSpace(val)
return true, nil
}
return false, nil
}
func assignBoolFromEnvIfExists(assignee *bool, name string) (bool, error) {
if assignee == nil {
return false, fmt.Errorf("assignee is nil")
}
var err error
if val, present := os.LookupEnv(name); present && strings.TrimSpace(val) != "" {
*assignee, err = strconv.ParseBool(val)
if err != nil {
return false, fmt.Errorf("failed to parse %s %q: %v", name, val, err)
}
return true, nil
}
return false, nil
}
func assignIntFromEnvIfExists(assignee *int, name string) (bool, error) {
if assignee == nil {
return false, fmt.Errorf("assignee is nil")
}
var err error
if val, present := os.LookupEnv(name); present && strings.TrimSpace(val) != "" {
*assignee, err = parseInt32(val, 10)
if err != nil {
return false, fmt.Errorf("failed to parse %s %q: %v", name, val, err)
}
return true, nil
}
return false, nil
}
func assignInt64FromEnvIfExists(assignee *int64, name string) (bool, error) {
if assignee == nil {
return false, fmt.Errorf("assignee is nil")
}
var err error
if val, present := os.LookupEnv(name); present && strings.TrimSpace(val) != "" {
*assignee, err = strconv.ParseInt(val, 10, 0)
if err != nil {
return false, fmt.Errorf("failed to parse %s %q: %v", name, val, err)
}
return true, nil
}
return false, nil
}
func assignFloat32FromEnvIfExists(assignee *float32, name string) (bool, error) {
if assignee == nil {
return false, fmt.Errorf("assignee is nil")
}
var err error
if val, present := os.LookupEnv(name); present && strings.TrimSpace(val) != "" {
*assignee, err = parseFloat32(val)
if err != nil {
return false, fmt.Errorf("failed to parse %s %q: %v", name, val, err)
}
return true, nil
}
return false, nil
}
func assignFloat64FromEnvIfExists(assignee *float64, name string) (bool, error) {
if assignee == nil {
return false, fmt.Errorf("assignee is nil")
}
var err error
if val, present := os.LookupEnv(name); present && strings.TrimSpace(val) != "" {
*assignee, err = strconv.ParseFloat(val, 64)
if err != nil {
return false, fmt.Errorf("failed to parse %s %q: %v", name, val, err)
}
return true, nil
}
return false, nil
}