in fbpcf/aws/S3Util.cpp [36:81]
S3ObjectReference uriToObjectReference(std::string url) {
std::string bucket;
std::string region;
auto uri = folly::Uri(url);
auto scheme = uri.scheme();
auto host = uri.host();
auto path = uri.path();
if (boost::iequals(scheme, "s3")) {
if (!std::getenv("AWS_DEFAULT_REGION")) {
throw AwsException{"AWS_DEFAULT_REGION not specified"};
}
region = std::getenv("AWS_DEFAULT_REGION");
bucket = host;
} else {
// A stricter version of:
// https://github.com/aws/aws-sdk-java/blob/c2c377058380cca07c0be9c8c6e0d7bf0b3777b8/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/AmazonS3URI.java#L29
//
// Matches "bucket.s3.region.amazonaws.com" or
// "bucket.s3-region.amazonaws.com"
static const re2::RE2 endpoint_pattern(
"^(?i)(.+)\\.s3[.-]([a-z0-9-]+)\\.amazonaws.com");
// Sub-match 1: (bucket).s3.region.amazonaws.com
// Sub-match 2: bucket.s3.(region).amazonaws.com
if (!re2::RE2::FullMatch(host, endpoint_pattern, &bucket, ®ion)) {
throw AwsException{folly::sformat(
"Incorrect S3 URI format: {}"
"Supported formats:"
"1. https://bucket.s3.region.amazonaws.com/key"
"2. https://bucket.s3-region.amazonaws.com/key"
"3. s3://bucket/key",
url)};
}
}
if (path.length() <= 1) {
throw AwsException{folly::sformat(
"Incorrect S3 URI format: {}"
"key not specified",
url)};
}
// path.substr(1) to remove the first character '/'
return S3ObjectReference{region, bucket, path.substr(1)};
}