S3ObjectReference uriToObjectReference()

in fbpcf/aws/S3Util.cpp [36:81]


S3ObjectReference uriToObjectReference(std::string url) {
  std::string bucket;
  std::string region;
  auto uri = folly::Uri(url);
  auto scheme = uri.scheme();
  auto host = uri.host();
  auto path = uri.path();

  if (boost::iequals(scheme, "s3")) {
    if (!std::getenv("AWS_DEFAULT_REGION")) {
      throw AwsException{"AWS_DEFAULT_REGION not specified"};
    }
    region = std::getenv("AWS_DEFAULT_REGION");
    bucket = host;
  } else {
    // A stricter version of:
    // https://github.com/aws/aws-sdk-java/blob/c2c377058380cca07c0be9c8c6e0d7bf0b3777b8/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/AmazonS3URI.java#L29
    //
    // Matches "bucket.s3.region.amazonaws.com" or
    // "bucket.s3-region.amazonaws.com"
    static const re2::RE2 endpoint_pattern(
        "^(?i)(.+)\\.s3[.-]([a-z0-9-]+)\\.amazonaws.com");

    // Sub-match 1: (bucket).s3.region.amazonaws.com
    // Sub-match 2: bucket.s3.(region).amazonaws.com
    if (!re2::RE2::FullMatch(host, endpoint_pattern, &bucket, &region)) {
      throw AwsException{folly::sformat(
          "Incorrect S3 URI format: {}"
          "Supported formats:"
          "1. https://bucket.s3.region.amazonaws.com/key"
          "2. https://bucket.s3-region.amazonaws.com/key"
          "3. s3://bucket/key",
          url)};
    }
  }

  if (path.length() <= 1) {
    throw AwsException{folly::sformat(
        "Incorrect S3 URI format: {}"
        "key not specified",
        url)};
  }

  // path.substr(1) to remove the first character '/'
  return S3ObjectReference{region, bucket, path.substr(1)};
}