agent/s3util/s3uri.go (105 lines of code) (raw):
// Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may not
// use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing
// permissions and limitations under the License.
// Package s3util contains utilities for working with S3
package s3util
import (
"errors"
"fmt"
"net/url"
"regexp"
"strings"
"github.com/aws/amazon-ssm-agent/agent/log"
)
const (
// Regex for S3 URLs, VPCE interface endpoint
vpceUrlPattern = "^((.+)\\.)?" + // maybe a bucket name
"(bucket|accesspoint|control)\\.vpce-[-a-z0-9]+\\." + // VPC endpoint DNS name
"s3[.-]" + // S3 service name
"(([-a-z0-9]+)\\.)?" + // region name, optional for us-east-1
"vpce\\." +
"(amazonaws\\.com|amazonaws\\.com\\.cn|c2s\\.ic\\.gov|sc2s\\.sgov\\.gov|cloud\\.adc-e\\.uk|csp\\.hci\\.ic\\.gov)"
vpceUrlPatternBucketIdx = 2
vpceUrlPatternRegionIdx = 5
// Regex for S3 URLs, public S3 endpoint
nonVpceUrlPattern = "^((.+)\\.)?" + // maybe a bucket name
"s3[.-](website[-.])?(accelerate\\.)?(dualstack[-.])?" + // S3 service name with optional features
"(([-a-z0-9]+)\\.)?" + // region name, optional for us-east-1
"(amazonaws\\.com|amazonaws\\.com\\.cn|c2s\\.ic\\.gov|sc2s\\.sgov\\.gov|cloud\\.adc-e\\.uk|csp\\.hci\\.ic\\.gov)"
nonVpceUrlPatternBucketIdx = 2
nonVpceUrlPatternRegionIdx = 7
// cn- is a prefix for China region
ChinaRegionPrefix = "cn-"
)
var (
vpceUrlRegex = regexp.MustCompile(vpceUrlPattern)
nonVpceUrlRegex = regexp.MustCompile(nonVpceUrlPattern)
)
// AmazonS3URL holds interesting pieces after parsing a s3 URL
type AmazonS3URL struct {
IsValidS3URI bool
IsPathStyle bool
Bucket string
Key string
Region string
}
// IsBucketAndKeyPresent checks the AmazonS3URL if it contains both bucket and key
func (output AmazonS3URL) IsBucketAndKeyPresent() bool {
return output.IsValidS3URI && output.Bucket != "" && output.Key != "" && output.Region != ""
}
// ParseAmazonS3URL parses an HTTP/HTTPS URL for an S3 resource and returns an
// AmazonS3URL object.
//
// S3 URLs come in two flavors: virtual hosted-style URLs and path-style URLs.
// Virtual hosted-style URLs have the bucket name as the first component of the
// hostname, e.g.
//
// https://mybucket.s3.us-east-1.amazonaws.com/a/b/c
//
// Path-style URLs have the bucket name as the first component of the path, e.g.
//
// https://s3.us-east-1.amazonaws.com/mybucket/a/b/c
//
// S3 supports a few features that affect how the URL is formed:
// - Website endpoints - "s3.$REGION" becomes "s3-website[-.]$REGION"
// - Transfer acceleration - "s3" becomes "s3-accelerate"
// - Dual-stack (IPv4/IPv6) - "s3" becomes "s3.dualstack"
// - Can be used with acceleration - "s3-accelerate.dualstack"
// - VPC endpoints - "s3.$REGION.amazonaws.com" becomes
// "bucket.$VPC_ENDPOINT_ID.s3.$REGION.vpce.amazonaws.com"
//
// References:
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/WebsiteEndpoints.html
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/transfer-acceleration-getting-started.html
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/dual-stack-endpoints.html
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html
func ParseAmazonS3URL(log log.T, s3URL *url.URL) (output AmazonS3URL) {
output = AmazonS3URL{
IsValidS3URI: false,
IsPathStyle: false,
Bucket: "",
Key: "",
Region: "",
}
output, err := parseBucketAndRegionFromHost(s3URL.Host, vpceUrlRegex, vpceUrlPatternBucketIdx, vpceUrlPatternRegionIdx)
if err != nil {
output, err = parseBucketAndRegionFromHost(s3URL.Host, nonVpceUrlRegex, nonVpceUrlPatternBucketIdx, nonVpceUrlPatternRegionIdx)
if err != nil {
output.IsValidS3URI = false
return
}
}
output.IsPathStyle = output.Bucket == ""
path := s3URL.Path
if output.IsPathStyle {
// no bucket name in the authority, parse it from the path
output.IsPathStyle = true
// grab the encoded path so we don't run afoul of '/'s in the bucket name
if path == "/" || path == "" {
} else {
path = path[1:]
index := strings.Index(path, "/")
if index == -1 {
// https://s3.amazonaws.com/bucket
output.Bucket = path
output.Key = ""
} else if index == (len(path) - 1) {
// https://s3.amazonaws.com/bucket/
output.Bucket = strings.TrimRight(path, "/")
output.Key = ""
} else {
// https://s3.amazonaws.com/bucket/key
output.Bucket = path[:index]
output.Key = path[index+1:]
}
}
} else {
// bucket name in the host, path is the object key
if path == "/" || path == "" {
output.Key = ""
} else {
output.Key = path[1:]
}
}
if strings.EqualFold(output.Region, "external-1") {
output.Region = "us-east-1"
} else if output.Region == "" {
// s3 bucket URL in us-east-1 doesn't include region
output.Region = "us-east-1"
}
return
}
func parseBucketAndRegionFromHost(host string, re *regexp.Regexp, bucketIdx, regionIdx int) (AmazonS3URL, error) {
result := re.FindStringSubmatch(host)
if result != nil && len(result) > bucketIdx && len(result) > regionIdx {
return AmazonS3URL{
IsValidS3URI: true,
Bucket: result[bucketIdx],
Region: result[regionIdx],
}, nil
} else {
return AmazonS3URL{}, errors.New("no match")
}
}
// String returns the string representation of the AmazonS3URL
func (output AmazonS3URL) String() string {
return fmt.Sprintf("{Region: %s; Bucket: %s; Key: %s; IsValidS3URI: %v; IsPathStyle: %v}",
output.Region, output.Bucket, output.Key, output.IsValidS3URI, output.IsPathStyle)
}