cmd/credentialUtil.go (426 lines of code) (raw):

// Copyright © 2017 Microsoft <wastore@microsoft.com> // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // This file contains credential utils used only in cmd module. package cmd import ( "context" "errors" "fmt" "net/http" "net/url" "strings" "sync" "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" "github.com/Azure/azure-storage-azcopy/v10/common" "github.com/Azure/azure-storage-azcopy/v10/ste" "github.com/minio/minio-go/pkg/s3utils" ) var once sync.Once var autoOAuth sync.Once var sharedKeyDeprecation sync.Once var sharedKeyDeprecationMessage = "*** WARNING *** shared key authentication for datalake is deprecated and will be removed in a future release. Please use shared access signature (SAS) or OAuth for authentication." func warnIfSharedKeyAuthForDatalake() { sharedKeyDeprecation.Do(func() { glcm.Warn(sharedKeyDeprecationMessage) jobsAdmin.JobsAdmin.LogToJobLog(sharedKeyDeprecationMessage, common.LogWarning) }) } // only one UserOAuthTokenManager should exists in azcopy-v2 process in cmd(FE) module for current user. // (given appAppPathFolder is mapped to current user) var currentUserOAuthTokenManager *common.UserOAuthTokenManager const oauthLoginSessionCacheKeyName = "AzCopyOAuthTokenCache" const oauthLoginSessionCacheServiceName = "AzCopyV10" const oauthLoginSessionCacheAccountName = "AzCopyOAuthTokenCache" // GetUserOAuthTokenManagerInstance gets or creates OAuthTokenManager for current user. // Note: Currently, only support to have TokenManager for one user mapping to one tenantID. func GetUserOAuthTokenManagerInstance() *common.UserOAuthTokenManager { once.Do(func() { if common.AzcopyJobPlanFolder == "" { panic("invalid state, AzcopyJobPlanFolder should not be an empty string") } cacheName := common.GetEnvironmentVariable(common.EEnvironmentVariable.LoginCacheName()) currentUserOAuthTokenManager = common.NewUserOAuthTokenManagerInstance(common.CredCacheOptions{ DPAPIFilePath: common.AzcopyJobPlanFolder, KeyName: common.Iff(cacheName != "", cacheName, oauthLoginSessionCacheKeyName), ServiceName: oauthLoginSessionCacheServiceName, AccountName: common.Iff(cacheName != "", cacheName, oauthLoginSessionCacheAccountName), }) }) return currentUserOAuthTokenManager } /* * GetInstanceOAuthTokenInfo returns OAuth token, obtained by auto-login, * for current instance of AzCopy. */ func GetOAuthTokenManagerInstance() (*common.UserOAuthTokenManager, error) { var err error autoOAuth.Do(func() { var lca loginCmdArgs autoLoginType := strings.ToLower(common.GetEnvironmentVariable(common.EEnvironmentVariable.AutoLoginType())) if autoLoginType == "" { glcm.Info("Autologin not specified.") return } if tenantID := common.GetEnvironmentVariable(common.EEnvironmentVariable.TenantID()); tenantID != "" { lca.tenantID = tenantID } if endpoint := common.GetEnvironmentVariable(common.EEnvironmentVariable.AADEndpoint()); endpoint != "" { lca.aadEndpoint = endpoint } // Fill up lca lca.loginType = autoLoginType switch autoLoginType { case common.EAutoLoginType.SPN().String(): lca.applicationID = common.GetEnvironmentVariable(common.EEnvironmentVariable.ApplicationID()) lca.certPath = common.GetEnvironmentVariable(common.EEnvironmentVariable.CertificatePath()) lca.certPass = common.GetEnvironmentVariable(common.EEnvironmentVariable.CertificatePassword()) lca.clientSecret = common.GetEnvironmentVariable(common.EEnvironmentVariable.ClientSecret()) case common.EAutoLoginType.MSI().String(): lca.identityClientID = common.GetEnvironmentVariable(common.EEnvironmentVariable.ManagedIdentityClientID()) lca.identityObjectID = common.GetEnvironmentVariable(common.EEnvironmentVariable.ManagedIdentityObjectID()) lca.identityResourceID = common.GetEnvironmentVariable(common.EEnvironmentVariable.ManagedIdentityResourceString()) case common.EAutoLoginType.Device().String(): case common.EAutoLoginType.AzCLI().String(): case common.EAutoLoginType.PsCred().String(): case common.EAutoLoginType.Workload().String(): default: glcm.Error("Invalid Auto-login type specified: " + autoLoginType) return } lca.persistToken = false if err = lca.process(); err != nil { glcm.Error(fmt.Sprintf("Failed to perform Auto-login: %v.", err.Error())) } }) if err != nil { return nil, err } return GetUserOAuthTokenManagerInstance(), nil } var announceOAuthTokenOnce sync.Once func oAuthTokenExists() (oauthTokenExists bool) { // Note: Environment variable for OAuth token should only be used in testing, or the case user clearly now how to protect // the tokens if common.EnvVarOAuthTokenInfoExists() { announceOAuthTokenOnce.Do( func() { glcm.Info(fmt.Sprintf("%v is set.", common.EnvVarOAuthTokenInfo)) // Log the case when env var is set, as it's rare case. }, ) oauthTokenExists = true } uotm, err := GetOAuthTokenManagerInstance() if err != nil { oauthTokenExists = false return } if hasCachedToken, err := uotm.HasCachedToken(); hasCachedToken { oauthTokenExists = true } else if err != nil { //nolint:staticcheck // Log the error if fail to get cached token, as these are unhandled errors, and should not influence the logic flow. // Uncomment for debugging. // glcm.Info(fmt.Sprintf("No cached token found, %v", err)) } return } var stashedEnvCredType = "" // GetCredTypeFromEnvVar tries to get credential type from environment variable defined by envVarCredentialType. func GetCredTypeFromEnvVar() common.CredentialType { rawVal := stashedEnvCredType if stashedEnvCredType == "" { rawVal = common.GetEnvironmentVariable(common.EEnvironmentVariable.CredentialType()) if rawVal == "" { return common.ECredentialType.Unknown() } stashedEnvCredType = rawVal } // Remove the env var after successfully fetching once, // in case of env var is further spreading into child processes unexpectedly. common.ClearEnvironmentVariable(common.EEnvironmentVariable.CredentialType()) // Try to get the value set. var credType common.CredentialType if err := credType.Parse(rawVal); err != nil { return common.ECredentialType.Unknown() } return credType } type rawFromToInfo struct { fromTo common.FromTo source, destination common.ResourceString } const trustedSuffixesNameAAD = "trusted-microsoft-suffixes" const trustedSuffixesAAD = "*.core.windows.net;*.core.chinacloudapi.cn;*.core.cloudapi.de;*.core.usgovcloudapi.net;*.storage.azure.net" // checkAuthSafeForTarget checks our "implicit" auth types (those that pick up creds from the environment // or a prior login) to make sure they are only being used in places where we know those auth types are safe. // This prevents, for example, us accidentally sending OAuth creds to some place they don't belong func checkAuthSafeForTarget(ct common.CredentialType, resource, extraSuffixesAAD string, resourceType common.Location) error { getSuffixes := func(list string, extras string) []string { extras = strings.Trim(extras, " ") if extras != "" { list += ";" + extras } return strings.Split(list, ";") } isResourceInSuffixList := func(suffixes []string) (string, bool) { u, err := url.Parse(resource) if err != nil { return "<unparsable>", false } host := strings.ToLower(u.Host) for _, s := range suffixes { s = strings.Trim(s, " *") // trim *.foo to .foo s = strings.ToLower(s) if strings.HasSuffix(host, s) { return host, true } } return host, false } switch ct { case common.ECredentialType.Unknown(), common.ECredentialType.Anonymous(): // these auth types don't pick up anything from environment vars, so they are not the focus of this routine return nil case common.ECredentialType.OAuthToken(), common.ECredentialType.MDOAuthToken(), common.ECredentialType.SharedKey(): // Files doesn't currently support OAuth, but it's a valid azure endpoint anyway, so it'll pass the check. if resourceType != common.ELocation.Blob() && resourceType != common.ELocation.BlobFS() && resourceType != common.ELocation.File() { // There may be a reason for files->blob to specify this. if resourceType == common.ELocation.Local() { return nil } return fmt.Errorf("azure OAuth authentication to %s is not enabled in AzCopy", resourceType.String()) } // these are Azure auth types, so make sure the resource is known to be in Azure domainSuffixes := getSuffixes(trustedSuffixesAAD, extraSuffixesAAD) if host, ok := isResourceInSuffixList(domainSuffixes); !ok { return fmt.Errorf( "the URL requires authentication. If this URL is in fact an Azure service, you can enable Azure authentication to %s. "+ "To enable, view the documentation for "+ "the parameter --%s, by running 'AzCopy copy --help'. BUT if this URL is not an Azure service, do NOT enable Azure authentication to it. "+ "Instead, see if the URL host supports authentication by way of a token that can be included in the URL's query string", // E.g. CDN apparently supports a non-SAS type of token as noted here: https://docs.microsoft.com/en-us/azure/cdn/cdn-token-auth#setting-up-token-authentication // Including such a token in the URL will cause AzCopy to see it as a "public" URL (since the URL on its own will pass // our "isPublic" access tests, which run before this routine). host, trustedSuffixesNameAAD) } case common.ECredentialType.S3AccessKey(): if resourceType != common.ELocation.S3() { //noinspection ALL return fmt.Errorf("S3 access key authentication to %s is not enabled in AzCopy", resourceType.String()) } // just check with minio. No need to have our own list of S3 domains, since minio effectively // has that list already, we can't talk to anything outside that list because minio won't let us, // and the parsing of s3 URL is non-trivial. E.g. can't just look for the ending since // something like https://someApi.execute-api.someRegion.amazonaws.com is AWS but is a customer- // written code, not S3. ok := false host := "<unparsable url>" u, err := url.Parse(resource) if err == nil { host = u.Host parts, err := common.NewS3URLParts(*u) // strip any leading bucket name from URL, to get an endpoint we can pass to s3utils if err == nil { u, err := url.Parse("https://" + parts.Endpoint) ok = err == nil && s3utils.IsAmazonEndpoint(*u) } } if !ok { return fmt.Errorf( "s3 authentication to %s is not currently supported in AzCopy", host) } case common.ECredentialType.GoogleAppCredentials(): if resourceType != common.ELocation.GCP() { return fmt.Errorf("Google Application Credentials to %s is not valid", resourceType.String()) } u, err := url.Parse(resource) if err == nil { host := u.Host _, err := common.NewGCPURLParts(*u) if err != nil { return fmt.Errorf("GCP authentication to %s is not currently supported", host) } } default: panic("unknown credential type") } return nil } func logAuthType(ct common.CredentialType, location common.Location, isSource bool) { if location == common.ELocation.Unknown() { return // nothing to log } else if location.IsLocal() { return // don't log local ones, no point } else if ct == common.ECredentialType.Anonymous() { return // don't log these either (too cluttered and auth type is obvious from the URL) } resource := "destination" if isSource { resource = "source" } name := ct.String() if ct == common.ECredentialType.OAuthToken() { name = "Azure AD" // clarify the name to something users will recognize } else if ct == common.ECredentialType.MDOAuthToken() { name = "Azure AD (Managed Disk)" } message := fmt.Sprintf("Authenticating to %s using %s", resource, name) if ct == common.ECredentialType.Unknown() && location.IsAzure() { message += ", Please authenticate using Microsoft Entra ID (https://aka.ms/AzCopy/AuthZ), use AzCopy login, or append a SAS token to your Azure URL." } if _, exists := authMessagesAlreadyLogged.Load(message); !exists { authMessagesAlreadyLogged.Store(message, struct{}{}) // dedup because source is auth'd by both enumerator and STE if jobsAdmin.JobsAdmin != nil { jobsAdmin.JobsAdmin.LogToJobLog(message, common.LogInfo) } glcm.Info(message) } } var authMessagesAlreadyLogged = &sync.Map{} // isPublic reports true if the Blob URL passed can be read without auth. func isPublic(ctx context.Context, blobResourceURL string, cpkOptions common.CpkOptions) (isPublicResource bool) { bURLParts, err := blob.ParseURL(blobResourceURL) if err != nil { return false } if bURLParts.ContainerName == "" || strings.Contains(bURLParts.ContainerName, "*") { // Service level searches can't possibly be public. return false } // This request will not be logged. This can fail, and too many Cx do not like this. clientOptions := ste.NewClientOptions(policy.RetryOptions{ MaxRetries: ste.UploadMaxTries, TryTimeout: ste.UploadTryTimeout, RetryDelay: ste.UploadRetryDelay, MaxRetryDelay: ste.UploadMaxRetryDelay, }, policy.TelemetryOptions{ ApplicationID: common.AddUserAgentPrefix(common.UserAgent), }, nil, ste.LogOptions{}, nil, nil) blobClient, _ := blob.NewClientWithNoCredential(bURLParts.String(), &blob.ClientOptions{ClientOptions: clientOptions}) bURLParts.BlobName = "" bURLParts.Snapshot = "" bURLParts.VersionID = "" // Scenario 1: When resourceURL points to a container or a virtual directory // Check if the virtual directory is accessible by doing GetProperties on container. // Virtual directory can be public only when its parent container is public. containerClient, _ := container.NewClientWithNoCredential(bURLParts.String(), &container.ClientOptions{ClientOptions: clientOptions}) if _, err := containerClient.GetProperties(ctx, nil); err == nil { return true } // Scenario 2: When resourceURL points to a blob if _, err := blobClient.GetProperties(ctx, &blob.GetPropertiesOptions{CPKInfo: cpkOptions.GetCPKInfo()}); err == nil { return true } return false } // mdAccountNeedsOAuth pings the passed in md account, and checks if we need additional token with Disk-socpe func mdAccountNeedsOAuth(ctx context.Context, blobResourceURL string, cpkOptions common.CpkOptions) bool { // This request will not be logged. This can fail, and too many Cx do not like this. clientOptions := ste.NewClientOptions(policy.RetryOptions{ MaxRetries: ste.UploadMaxTries, TryTimeout: ste.UploadTryTimeout, RetryDelay: ste.UploadRetryDelay, MaxRetryDelay: ste.UploadMaxRetryDelay, }, policy.TelemetryOptions{ ApplicationID: common.AddUserAgentPrefix(common.UserAgent), }, nil, ste.LogOptions{}, nil, nil) blobClient, _ := blob.NewClientWithNoCredential(blobResourceURL, &blob.ClientOptions{ClientOptions: clientOptions}) _, err := blobClient.GetProperties(ctx, &blob.GetPropertiesOptions{CPKInfo: cpkOptions.GetCPKInfo()}) if err == nil { return false } var respErr *azcore.ResponseError if errors.As(err, &respErr) { if respErr.StatusCode == 401 || respErr.StatusCode == 403 { // *sometimes* the service can return 403s. challenge := respErr.RawResponse.Header.Get("WWW-Authenticate") if strings.Contains(challenge, common.MDResource) { return true } } } return false } func getCredentialTypeForLocation(ctx context.Context, location common.Location, resource common.ResourceString, isSource bool, cpkOptions common.CpkOptions) (credType common.CredentialType, isPublic bool, err error) { return doGetCredentialTypeForLocation(ctx, location, resource, isSource, GetCredTypeFromEnvVar, cpkOptions) } func doGetCredentialTypeForLocation(ctx context.Context, location common.Location, resource common.ResourceString, isSource bool, getForcedCredType func() common.CredentialType, cpkOptions common.CpkOptions) (credType common.CredentialType, public bool, err error) { public = false err = nil switch location { case common.ELocation.Local(), common.ELocation.Benchmark(), common.ELocation.None(), common.ELocation.Pipe(): return common.ECredentialType.Anonymous(), false, nil } defer func() { logAuthType(credType, location, isSource) }() // caution: If auth-type is unsafe, below defer statement will change the return value credType defer func() { if err != nil { return } if err = checkAuthSafeForTarget(credType, resource.Value, cmdLineExtraSuffixesAAD, location); err != nil { credType = common.ECredentialType.Unknown() public = false } }() if getForcedCredType() != common.ECredentialType.Unknown() && location != common.ELocation.S3() && location != common.ELocation.GCP() { credType = getForcedCredType() return } if location == common.ELocation.S3() { accessKeyID := common.GetEnvironmentVariable(common.EEnvironmentVariable.AWSAccessKeyID()) secretAccessKey := common.GetEnvironmentVariable(common.EEnvironmentVariable.AWSSecretAccessKey()) if accessKeyID == "" || secretAccessKey == "" { credType = common.ECredentialType.S3PublicBucket() public = true return } credType = common.ECredentialType.S3AccessKey() return } if location == common.ELocation.GCP() { googleAppCredentials := common.GetEnvironmentVariable(common.EEnvironmentVariable.GoogleAppCredentials()) if googleAppCredentials == "" { return common.ECredentialType.Unknown(), false, errors.New("GOOGLE_APPLICATION_CREDENTIALS environment variable must be set before using GCP transfer feature") } credType = common.ECredentialType.GoogleAppCredentials() return } // Special blob destinations - public and MD account needing oAuth if location == common.ELocation.Blob() { uri, _ := resource.FullURL() if isSource && resource.SAS == "" && isPublic(ctx, uri.String(), cpkOptions) { credType = common.ECredentialType.Anonymous() public = true return } if strings.HasPrefix(uri.Host, "md-") && mdAccountNeedsOAuth(ctx, uri.String(), cpkOptions) { if !oAuthTokenExists() { return common.ECredentialType.Unknown(), false, common.NewAzError(common.EAzError.LoginCredMissing(), "No SAS token or OAuth token is present and the resource is not public") } credType = common.ECredentialType.MDOAuthToken() return } } if resource.SAS != "" { credType = common.ECredentialType.Anonymous() return } if oAuthTokenExists() { credType = common.ECredentialType.OAuthToken() return } // BlobFS currently supports Shared key. Remove this piece of code, once // we deprecate that. if location == common.ELocation.BlobFS() { name := common.GetEnvironmentVariable(common.EEnvironmentVariable.AccountName()) key := common.GetEnvironmentVariable(common.EEnvironmentVariable.AccountKey()) if name != "" && key != "" { // TODO: To remove, use for internal testing, SharedKey should not be supported from commandline credType = common.ECredentialType.SharedKey() warnIfSharedKeyAuthForDatalake() } } // We may not always use the OAuth token on Managed Disks. As such, we should change to the type indicating the potential for use. // if mdAccount && credType == common.ECredentialType.OAuthToken() { // credType = common.ECredentialType.MDOAuthToken() // } return } func GetCredentialInfoForLocation(ctx context.Context, location common.Location, resource common.ResourceString, isSource bool, cpkOptions common.CpkOptions) (credInfo common.CredentialInfo, isPublic bool, err error) { // get the type credInfo.CredentialType, isPublic, err = getCredentialTypeForLocation(ctx, location, resource, isSource, cpkOptions) // flesh out the rest of the fields, for those types that require it if credInfo.CredentialType.IsAzureOAuth() { uotm := GetUserOAuthTokenManagerInstance() if tokenInfo, err := uotm.GetTokenInfo(ctx); err != nil { return credInfo, false, err } else { credInfo.OAuthTokenInfo = *tokenInfo } } return } // getCredentialType checks user provided info, and gets the proper credential type // for current command. // TODO: consider replace with calls to getCredentialInfoForLocation // (right now, we have tweaked this to be a wrapper for that function, but really should remove this one totally) func getCredentialType(ctx context.Context, raw rawFromToInfo, cpkOptions common.CpkOptions) (credType common.CredentialType, err error) { switch { case raw.fromTo.To().IsRemote(): // we authenticate to the destination. Source is assumed to be SAS, or public, or a local resource credType, _, err = getCredentialTypeForLocation(ctx, raw.fromTo.To(), raw.destination, false, common.CpkOptions{}) case raw.fromTo == common.EFromTo.BlobTrash() || raw.fromTo == common.EFromTo.BlobFSTrash() || raw.fromTo == common.EFromTo.FileTrash(): // For to Trash direction, use source as resource URL // Also, by setting isSource=false we inform getCredentialTypeForLocation() that resource // being deleted cannot be public. credType, _, err = getCredentialTypeForLocation(ctx, raw.fromTo.From(), raw.source, false, cpkOptions) case raw.fromTo.From().IsRemote() && raw.fromTo.To().IsLocal(): // we authenticate to the source. credType, _, err = getCredentialTypeForLocation(ctx, raw.fromTo.From(), raw.source, true, cpkOptions) default: credType = common.ECredentialType.Anonymous() // Log the FromTo types which getCredentialType hasn't solved, in case of miss-use. glcm.Info(fmt.Sprintf("Use anonymous credential by default for from-to '%v'", raw.fromTo)) } return } // ============================================================================================== // pipeline factory methods // ============================================================================================== // createClientOptions creates generic client options which are required to create any // client to interact with storage service. Default options are modified to suit azcopy. // srcCred is required in cases where source is authenticated via oAuth for S2S transfers func createClientOptions(logger common.ILoggerResetable, srcCred *common.ScopedToken, reauthCred *common.ScopedAuthenticator) azcore.ClientOptions { logOptions := ste.LogOptions{} if logger != nil { logOptions.RequestLogOptions.SyslogDisabled = common.IsForceLoggingDisabled() logOptions.Log = logger.Log logOptions.ShouldLog = logger.ShouldLog } return ste.NewClientOptions(policy.RetryOptions{ MaxRetries: ste.UploadMaxTries, TryTimeout: ste.UploadTryTimeout, RetryDelay: ste.UploadRetryDelay, MaxRetryDelay: ste.UploadMaxRetryDelay, }, policy.TelemetryOptions{ ApplicationID: common.AddUserAgentPrefix(common.UserAgent), }, ste.NewAzcopyHTTPClient(frontEndMaxIdleConnectionsPerHost), logOptions, srcCred, reauthCred) } const frontEndMaxIdleConnectionsPerHost = http.DefaultMaxIdleConnsPerHost