azkustoingest/internal/utils/ingestion_utils.go (87 lines of code) (raw):

// Package resources contains objects that are used to gather information about Kusto resources that are // used during various ingestion methods. package utils import ( "context" "fmt" "net/http" "path" "path/filepath" "strings" "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service" "github.com/Azure/azure-kusto-go/azkustoingest/ingestoptions" "github.com/Azure/azure-kusto-go/azkustoingest/internal/resources" ) const EstimatedCompressionFactor = 11 func FetchBlobSize(fPath string, ctx context.Context, client *http.Client) (size int64, err error) { if !strings.Contains(fPath, ".blob.") || strings.Contains(strings.ToLower(fPath), "managed_identity=") || strings.Contains(strings.ToLower(fPath), "token=") { return 0, nil } parsed, err := resources.Parse(fPath) if err != nil { return 0, err } var blobClient *azblob.Client = nil var objectNameSplit []string if len(parsed.SAS()) == 0 { objectParts := strings.Split(parsed.ObjectName(), ";") if len(objectParts) == 2 { cred, err := service.NewSharedKeyCredential(parsed.Account(), objectParts[1]) if err != nil { return 0, err } serviceUrl := fmt.Sprintf("%s://%s", parsed.URL().Scheme, parsed.URL().Host) blobClient, err = azblob.NewClientWithSharedKeyCredential(serviceUrl, cred, &azblob.ClientOptions{ ClientOptions: azcore.ClientOptions{ Transport: client, }, }) if err != nil { return 0, err } objectNameSplit = strings.SplitN(objectParts[0], "/", 2) } } if blobClient == nil { serviceUrl := fmt.Sprintf("%s://%s?%s", parsed.URL().Scheme, parsed.URL().Host, parsed.SAS().Encode()) blobClient, err = azblob.NewClientWithNoCredential(serviceUrl, &azblob.ClientOptions{ ClientOptions: azcore.ClientOptions{ Transport: client, }, }) if err != nil { return 0, err } objectNameSplit = strings.SplitN(parsed.ObjectName(), "/", 2) } blobCli := blobClient.ServiceClient().NewContainerClient(objectNameSplit[0]).NewBlobClient(objectNameSplit[1]) properties, err := blobCli.GetProperties(ctx, &blob.GetPropertiesOptions{}) if err != nil { return 0, err } return *properties.ContentLength, nil } func EstimateRawDataSize(compression ingestoptions.CompressionType, fileSize int64) int64 { switch compression { case ingestoptions.GZIP: case ingestoptions.ZIP: return fileSize * EstimatedCompressionFactor } return fileSize } // CompressionDiscovery looks at the file extension. If it is one we support, we return that // CompressionType that represents that value. Otherwise we return CTNone to indicate that the // file should not be compressed. func CompressionDiscovery(fName string) ingestoptions.CompressionType { var ext string if strings.HasPrefix(strings.ToLower(fName), "http") { ext = strings.ToLower(filepath.Ext(path.Base(fName))) } else { ext = strings.ToLower(filepath.Ext(fName)) } switch ext { case ".gz": return ingestoptions.GZIP case ".zip": return ingestoptions.ZIP } return ingestoptions.CTNone }