cmd/benchmark.go (256 lines of code) (raw):
// Copyright © Microsoft <wastore@microsoft.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
package cmd
import (
"errors"
"fmt"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azdatalake"
sharefile "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/file"
"os"
"strconv"
"strings"
"github.com/Azure/azure-storage-azcopy/v10/common"
"github.com/spf13/cobra"
)
// represents the raw benchmark command input from the user
type rawBenchmarkCmdArgs struct {
// The destination/src endpoint we are benchmarking.
target string
// parameters controlling the auto-generated data
sizePerFile string
fileCount uint
deleteTestData bool
numOfFolders uint
// options from flags
blockSizeMB float64
putBlobSizeMB float64
putMd5 bool
checkLength bool
blobType string
output string
mode string
}
const (
maxBytesPerFile = 4.75 * 1024 * 1024 * 1024 * 1024
sizeStringDescription = "a number immediately followed by K, M or G. E.g. 12k or 200G"
)
func ParseSizeString(s string, name string) (int64, error) {
message := name + " must be " + sizeStringDescription
if strings.Contains(s, " ") {
return 0, errors.New(message)
}
if len(s) < 2 {
return 0, errors.New(message)
}
n, err := strconv.Atoi(s[:len(s)-1])
if err != nil {
return 0, errors.New(message)
}
suffix := strings.ToLower(s[len(s)-1:])
bytes := int64(0)
switch suffix {
case "k":
bytes = int64(n) * 1024
case "m":
bytes = int64(n) * 1024 * 1024
case "g":
bytes = int64(n) * 1024 * 1024 * 1024
default:
return 0, errors.New(message)
}
return bytes, nil
}
// validates and transform raw input into cooked input
// raw benchmark args cook into copyArgs, because the actual work
// of a benchmark job is doing a copy. Benchmark just doesn't offer so many
// choices in its raw args
func (raw rawBenchmarkCmdArgs) cook() (CookedCopyCmdArgs, error) {
glcm.Info(common.BenchmarkPreviewNotice)
dummyCooked := CookedCopyCmdArgs{}
virtualDir := "benchmark-" + azcopyCurrentJobID.String() // create unique directory name, so we won't overwrite anything
if raw.fileCount <= 0 {
return dummyCooked, errors.New(common.FileCountParam + " must be greater than zero")
}
bytesPerFile, err := ParseSizeString(raw.sizePerFile, common.SizePerFileParam)
if err != nil {
return dummyCooked, err
}
if bytesPerFile <= 0 {
return dummyCooked, errors.New(common.SizePerFileParam + " must be greater than zero")
}
if bytesPerFile > maxBytesPerFile {
return dummyCooked, errors.New("file size too big")
}
// transcribe everything to copy args
c := rawCopyCmdArgs{}
c.setMandatoryDefaults()
benchMode := common.BenchMarkMode(0)
err = benchMode.Parse(raw.mode)
if err != nil {
return dummyCooked, err
}
downloadMode := benchMode == common.EBenchMarkMode.Download()
if downloadMode {
//We to write to NULL device, so our measurements are not masked by disk perf
c.dst = os.DevNull
c.src = raw.target
} else { // Upload
// src must be string, but needs to indicate that its for benchmark and encode what we want
c.src = benchmarkSourceHelper{}.ToUrl(raw.fileCount, bytesPerFile, raw.numOfFolders)
c.dst, err = raw.appendVirtualDir(raw.target, virtualDir)
if err != nil {
return dummyCooked, err
}
}
c.recursive = true // because source is directory-like, in which case recursive is required
c.internalOverrideStripTopDir = true // we don't want to append an extra strange name filled with meta characters at the destination
c.forceWrite = common.EOverwriteOption.True().String() // don't want the extra round trip (for overwrite check) when benchmarking
c.blockSizeMB = raw.blockSizeMB
c.putBlobSizeMB = raw.putBlobSizeMB
c.putMd5 = raw.putMd5
c.CheckLength = raw.checkLength
c.blobType = raw.blobType
c.output = raw.output
cooked, err := c.cook()
if err != nil {
return cooked, err
}
if downloadMode {
glcm.Info(fmt.Sprintf("Benchmarking downloads from %s.", cooked.Source.Value))
} else {
glcm.Info(fmt.Sprintf("Benchmarking uploads to %s.", cooked.Destination.Value))
}
if !downloadMode && raw.deleteTestData {
// set up automatic cleanup
cooked.followupJobArgs, err = raw.createCleanupJobArgs(cooked.Destination, logVerbosityRaw)
if err != nil {
return dummyCooked, err
}
}
return cooked, nil
}
func (raw rawBenchmarkCmdArgs) appendVirtualDir(target, virtualDir string) (string, error) {
switch InferArgumentLocation(target) {
case common.ELocation.Blob():
p, err := blob.ParseURL(target)
if err != nil {
return "", fmt.Errorf("error parsing the url %s. Failed with error %s", target, err.Error())
}
if p.ContainerName == "" || p.BlobName != "" {
return "", errors.New("the blob target must be a container")
}
p.BlobName = virtualDir
return p.String(), err
case common.ELocation.File():
p, err := sharefile.ParseURL(target)
if err != nil {
return "", fmt.Errorf("error parsing the url %s. Failed with error %s", target, err.Error())
}
if p.ShareName == "" || p.DirectoryOrFilePath != "" {
return "", errors.New("the file share target must be a file share root")
}
p.DirectoryOrFilePath = virtualDir
return p.String(), err
case common.ELocation.BlobFS():
p, err := azdatalake.ParseURL(target)
if err != nil {
return "", fmt.Errorf("error parsing the url %s. Failed with error %s", target, err.Error())
}
if p.FileSystemName == "" || p.PathName != "" {
return "", errors.New("the blobFS target must be a filesystem")
}
p.PathName = virtualDir
return p.String(), err
default:
return "", errors.New("benchmarking only supports https connections to Blob, Azure Files, and ADLS Gen2")
}
}
// define a cleanup job
func (raw rawBenchmarkCmdArgs) createCleanupJobArgs(benchmarkDest common.ResourceString, logVerbosity string) (*CookedCopyCmdArgs, error) {
rc := rawCopyCmdArgs{}
u, _ := benchmarkDest.FullURL() // don't check error, because it was parsed already in main job
rc.src = u.String() // the SOURCE for the deletion is the the dest from the benchmark
rc.recursive = true
switch InferArgumentLocation(rc.src) {
case common.ELocation.Blob():
rc.fromTo = common.EFromTo.BlobTrash().String()
case common.ELocation.File():
rc.fromTo = common.EFromTo.FileTrash().String()
case common.ELocation.BlobFS():
rc.fromTo = common.EFromTo.BlobFSTrash().String()
default:
return nil, errors.New("unsupported from-to for cleanup") // should never make it this far, due to earlier validation
}
rc.setMandatoryDefaults()
cooked, err := rc.cook()
cooked.jobID = common.NewJobID() // Override the job ID that cook gave us-- That would cause us to fail deletion.
cooked.isCleanupJob = true
cooked.cleanupJobMessage = "Running cleanup job to delete files created during benchmarking"
return &cooked, err
}
type benchmarkSourceHelper struct{}
// our code requires sources to be strings. So we may as well do the benchmark sources as URLs
// so we can identify then as such using a specific domain. ".invalid" is reserved globally for cases where
// you want a URL that can't possibly be a real one, so we'll use that
const benchmarkSourceHost = "benchmark.invalid"
func (h benchmarkSourceHelper) ToUrl(fileCount uint, bytesPerFile int64, numOfFolders uint) string {
return fmt.Sprintf("https://%s?fc=%d&bpf=%d&nf=%d", benchmarkSourceHost, fileCount, bytesPerFile, numOfFolders)
}
func (h benchmarkSourceHelper) FromUrl(s string) (fileCount uint, bytesPerFile int64, numOfFolders uint, err error) {
// TODO: consider replace with regex?
expectedPrefix := "https://" + benchmarkSourceHost + "?"
if !strings.HasPrefix(s, expectedPrefix) {
return 0, 0, 0, errors.New("invalid benchmark source string")
}
s = strings.TrimPrefix(s, expectedPrefix)
pieces := strings.Split(s, "&")
if len(pieces) != 3 ||
!strings.HasPrefix(pieces[0], "fc=") ||
!strings.HasPrefix(pieces[1], "bpf=") ||
!strings.HasPrefix(pieces[2], "nf=") {
return 0, 0, 0, errors.New("invalid benchmark source string")
}
pieces[0] = strings.Split(pieces[0], "=")[1]
pieces[1] = strings.Split(pieces[1], "=")[1]
pieces[2] = strings.Split(pieces[2], "=")[1]
fc, err := strconv.ParseUint(pieces[0], 10, 32)
if err != nil {
return 0, 0, 0, err
}
bpf, err := strconv.ParseInt(pieces[1], 10, 64)
if err != nil {
return 0, 0, 0, err
}
nf, err := strconv.ParseUint(pieces[2], 10, 32)
if err != nil {
return 0, 0, 0, err
}
return uint(fc), bpf, uint(nf), nil
}
var benchCmd *cobra.Command
func init() {
raw := rawBenchmarkCmdArgs{}
// benCmd represents the bench command
benchCmd = &cobra.Command{
Use: "bench [destination]",
Aliases: []string{"ben", "benchmark"},
SuggestFor: []string{"b", "bn"},
Short: benchCmdShortDescription,
Long: benchCmdLongDescription,
Example: benchCmdExample,
Args: func(cmd *cobra.Command, args []string) error {
// TODO: if/when we support benchmarking for S2S, note that the current code to set userAgent string in
// jobPartMgr will need to be changed if we want it to still set the benchmarking suffix for S2S
if len(args) == 1 {
raw.target = args[0]
} else {
return errors.New("wrong number of arguments, please refer to the help page on usage of this command")
}
return nil
},
Run: func(cmd *cobra.Command, args []string) {
var cooked CookedCopyCmdArgs // benchmark args cook into copy args
cooked, err := raw.cook()
if err != nil {
glcm.Error("failed to parse user input due to error: " + err.Error())
}
glcm.Info("Scanning...")
cooked.commandString = copyHandlerUtil{}.ConstructCommandStringFromArgs()
err = cooked.process()
if err != nil {
glcm.Error("failed to perform benchmark command due to error: " + err.Error())
}
glcm.SurrenderControl()
},
}
rootCmd.AddCommand(benchCmd)
benchCmd.PersistentFlags().StringVar(&raw.sizePerFile, common.SizePerFileParam, "250M", "Size of each auto-generated data file. Must be "+sizeStringDescription)
benchCmd.PersistentFlags().UintVar(&raw.fileCount, common.FileCountParam, common.FileCountDefault, "Number of auto-generated data files to use")
benchCmd.PersistentFlags().UintVar(&raw.numOfFolders, "number-of-folders", 0, "If larger than 0, create folders to divide up the data.")
benchCmd.PersistentFlags().BoolVar(&raw.deleteTestData, "delete-test-data", true, "If true, then the benchmark data will be deleted at the end of the benchmark run. Set it to false if you want to keep the data at the destination - e.g. to use it for manual tests outside benchmark mode")
benchCmd.PersistentFlags().Float64Var(&raw.blockSizeMB, "block-size-mb", 0, "Use this block size (specified in MiB). The default is automatically calculated based on file size. Decimal fractions are allowed - e.g. 0.25. Identical to the same-named parameter in the copy command")
benchCmd.PersistentFlags().Float64Var(&raw.putBlobSizeMB, "put-blob-size-mb", 0, "Use this size (specified in MiB) as a threshold to determine whether to upload a blob as a single PUT request when uploading to Azure Storage. The default value is automatically calculated based on file size. Decimal fractions are allowed (For example: 0.25).")
benchCmd.PersistentFlags().StringVar(&raw.blobType, "blob-type", "Detect", "Defines the type of blob at the destination. Used to allow benchmarking different blob types. Identical to the same-named parameter in the copy command")
benchCmd.PersistentFlags().BoolVar(&raw.putMd5, "put-md5", false, "Create an MD5 hash of each file, and save the hash as the Content-MD5 property of the destination blob/file. (By default the hash is NOT created.) Identical to the same-named parameter in the copy command")
benchCmd.PersistentFlags().BoolVar(&raw.checkLength, "check-length", true, "Check the length of a file on the destination after the transfer. If there is a mismatch between source and destination, the transfer is marked as failed.")
benchCmd.PersistentFlags().StringVar(&raw.mode, "mode", "upload", "Defines if AzCopy should test uploads or downloads from this target. Valid values are 'upload' and 'download'. Defaulted option is 'upload'.")
}