cmd/copyUtil.go (129 lines of code) (raw):
// Copyright © 2017 Microsoft <wastore@microsoft.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
package cmd
import (
"errors"
"fmt"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
"net/url"
"os"
"strings"
"sync"
"github.com/Azure/azure-storage-azcopy/v10/common"
)
const (
NumOfFilesPerDispatchJobPart = 10000
)
func parsePatterns(pattern string) (cookedPatterns []string) {
cookedPatterns = make([]string, 0)
rawPatterns := strings.Split(pattern, ";")
for _, pattern := range rawPatterns {
// skip the empty patterns
if len(pattern) != 0 {
cookedPatterns = append(cookedPatterns, pattern)
}
}
return
}
// returns result of stripping and if striptopdir is enabled
// if nothing happens, the original source is returned
func stripTrailingWildcardOnRemoteSource(source string, location common.Location) (result string, stripTopDir bool, err error) {
result = source
resourceURL, err := url.Parse(result)
gURLParts := common.NewGenericResourceURLParts(*resourceURL, location)
if err != nil {
err = fmt.Errorf("failed to parse url %s; %w", result, err)
return
}
if strings.Contains(gURLParts.GetContainerName(), "*") {
// Disallow container name search and object specifics
if gURLParts.GetObjectName() != "" {
err = errors.New("cannot combine a specific object name with an account-level search")
return
}
// Return immediately here because we know this will be safe.
return
}
// Trim the trailing /*.
if strings.HasSuffix(resourceURL.RawPath, "/*") {
resourceURL.RawPath = strings.TrimSuffix(resourceURL.RawPath, "/*")
resourceURL.Path = strings.TrimSuffix(resourceURL.Path, "/*")
stripTopDir = true
}
// Ensure there aren't any extra *s floating around.
if strings.Contains(resourceURL.RawPath, "*") {
err = errors.New("cannot use wildcards in the path section of the URL except in trailing \"/*\". If you wish to use * in your URL, manually encode it to %2A")
return
}
result = resourceURL.String()
return
}
func warnIfHasWildcard(oncer *sync.Once, paramName string, value string) {
if strings.Contains(value, "*") || strings.Contains(value, "?") {
oncer.Do(func() {
glcm.Warn(fmt.Sprintf("*** Warning *** The %s parameter does not support wildcards. The wildcard "+
"character provided will be interpreted literally and will not have any wildcard effect. To use wildcards "+
"(in filenames only, not paths) use include-pattern or exclude-pattern", paramName))
})
}
}
type copyHandlerUtil struct{}
// TODO: Need be replaced with anonymous embedded field technique.
var gCopyUtil = copyHandlerUtil{}
// checks if a given url points to a container or virtual directory, as opposed to a blob or prefix match
func (util copyHandlerUtil) urlIsContainerOrVirtualDirectory(rawURL string) bool {
parsedURL, err := url.Parse(rawURL)
if err != nil {
return false
}
blobURLParts, err := blob.ParseURL(rawURL)
if err != nil {
return false
}
if blobURLParts.IPEndpointStyleInfo.AccountName == "" {
// Typical endpoint style
// If there's no slashes after the first, it's a container.
// If there's a slash on the end, it's a virtual directory/container.
// Otherwise, it's just a blob.
if len(parsedURL.Path) == 0 {
return true // We know for SURE that it's a account level URL
}
return strings.HasSuffix(parsedURL.Path, "/") || strings.Count(parsedURL.Path[1:], "/") == 0
} else {
// IP endpoint style: https://IP:port/accountname/container
// If there's 2 or less slashes after the first, it's a container.
// OR If there's a slash on the end, it's a virtual directory/container.
// Otherwise, it's just a blob.
return strings.HasSuffix(parsedURL.Path, "/") || strings.Count(parsedURL.Path[1:], "/") <= 1
}
}
// redactSigQueryParam checks for the signature in the given rawquery part of the url
// If the signature exists, it replaces the value of the signature with "REDACTED"
// This api is used when SAS is written to log file to avoid exposing the user given SAS
// TODO: remove this, redactSigQueryParam could be added in SDK
func (util copyHandlerUtil) redactSigQueryParam(rawQuery string) (bool, string) {
rawQuery = strings.ToLower(rawQuery) // lowercase the string so we can look for ?sig= and &sig=
sigFound := strings.Contains(rawQuery, "?"+common.SigAzure+"=")
if !sigFound {
sigFound = strings.Contains(rawQuery, "&"+common.SigAzure+"=")
if !sigFound {
return sigFound, rawQuery // [?|&]sig= not found; return same rawQuery passed in (no memory allocation)
}
}
// [?|&]sig= found, redact its value
values, _ := url.ParseQuery(rawQuery)
for name := range values {
if strings.EqualFold(name, common.SigAzure) {
values[name] = []string{"REDACTED"}
}
}
return sigFound, values.Encode()
}
// ConstructCommandStringFromArgs creates the user given commandString from the os Arguments
// If any argument passed is an http Url and contains the signature, then the signature is redacted
func (util copyHandlerUtil) ConstructCommandStringFromArgs() string {
// Get the os Args and strip away the first argument since it will be the path of Azcopy executable
args := os.Args[1:]
if len(args) == 0 {
return ""
}
s := strings.Builder{}
for _, arg := range args {
// If the argument starts with http, it is either the remote source or remote destination
// If there exists a signature in the argument string it needs to be redacted
if startsWith(arg, "http") {
// parse the url
argUrl, err := url.Parse(arg)
// If there is an error parsing the url, then throw the error
if err != nil {
panic(fmt.Errorf("error parsing the url %s. Failed with error %s", arg, err.Error()))
}
// Check for the signature query parameter
_, rawQuery := util.redactSigQueryParam(argUrl.RawQuery)
argUrl.RawQuery = rawQuery
s.WriteString(argUrl.String())
} else {
s.WriteString(arg)
}
s.WriteString(" ")
}
return s.String()
}
// doesBlobRepresentAFolder verifies whether blob is valid or not.
// Used to handle special scenarios or conditions.
func (util copyHandlerUtil) doesBlobRepresentAFolder(metadata map[string]*string) bool {
// this condition is to handle the WASB V1 directory structure.
// HDFS driver creates a blob for the empty directories (let’s call it ‘myfolder’)
// and names all the blobs under ‘myfolder’ as such: ‘myfolder/myblob’
// The empty directory has meta-data 'hdi_isfolder = true'
// Note: GoLang sometimes sets metadata keys with the first letter capitalized
v, ok := common.TryReadMetadata(metadata, common.POSIXFolderMeta)
return ok && v != nil && strings.ToLower(*v) == "true"
}
func startsWith(s string, t string) bool {
return len(s) >= len(t) && strings.EqualFold(s[0:len(t)], t)
}
/////////////////////////////////////////////////////////////////////////////////////////////////
type s3URLPartsExtension struct {
common.S3URLParts
}