app/collectors/bucket.scala (126 lines of code) (raw):
package collectors
import java.time.Instant
import agent._
import conf.AWS
import controllers.routes
import play.api.mvc.Call
import software.amazon.awssdk.regions.Region
import software.amazon.awssdk.services.s3.model.{
GetBucketLocationRequest,
ListBucketsRequest,
S3Exception,
Bucket => AWSBucket
}
import software.amazon.awssdk.services.s3.{S3Client, S3Configuration}
import utils.Logging
import scala.jdk.CollectionConverters._
import scala.language.{postfixOps, reflectiveCalls}
import scala.util.control.NonFatal
class BucketCollectorSet(accounts: Accounts)
extends CollectorSet[Bucket](
ResourceType("bucket"),
accounts,
Some(Global)
) {
val lookupCollector: PartialFunction[Origin, Collector[Bucket]] = {
case amazon: AmazonOrigin =>
AWSBucketCollector(amazon, resource, amazon.crawlRate(resource.name))
}
}
case class AWSBucketCollector(
origin: AmazonOrigin,
resource: ResourceType,
crawlRate: CrawlRate
) extends Collector[Bucket]
with Logging {
// The `useArnRegionEnabled` flag enables us to receive data on buckets in all available AWS regions
// https://stackoverflow.com/questions/46769493/how-enable-force-global-bucket-access-in-aws-s3-sdk-java-2-0
val s3Configuration = S3Configuration.builder.useArnRegionEnabled(true).build
// The region of the S3 Client is hardcoded to EU-WEST-1, because AWS-Global and US-EAST-1 do not return all buckets.
// We decided to hardcode this here, instead of creating another enum for simplicity
val client = S3Client.builder
.credentialsProvider(origin.credentials.provider)
.region(Region.EU_WEST_1)
.overrideConfiguration(AWS.clientConfig)
.serviceConfiguration(s3Configuration)
.build
// This second S3 client, with a region of US-EAST-1, gives us the correct createdTime value unlike the other regions,
// as documented here: https://stackoverflow.com/questions/54353373/getting-incorrect-creation-dates-using-aws-s3
val clientForCorrectCreatedTime = S3Client.builder
.credentialsProvider(origin.credentials.provider)
.region(Region.US_EAST_1)
.build
def crawl: Iterable[Bucket] = {
val request = ListBucketsRequest.builder.build
val listBuckets = client.listBuckets(request).buckets().asScala.toList
log.info(
s"Total number of buckets with S3 Client region EU-WEST-1 for account ${origin.account} ${listBuckets.length}"
)
val listBucketsForCorrectCreatedTime =
clientForCorrectCreatedTime.listBuckets(request).buckets.asScala.toList
log.info(
s"Total number of buckets with S3 Client region US-EAST-1 for account ${origin.account} ${listBucketsForCorrectCreatedTime.length}"
)
listBuckets.zip(listBucketsForCorrectCreatedTime).map {
case (bucket, bucketWithCorrectCreatedTime) =>
Bucket.fromApiData(bucket, client, origin, bucketWithCorrectCreatedTime)
}
}
}
object Bucket extends Logging {
private def arn(bucketName: String) = s"arn:aws:s3:::$bucketName"
def fromApiData(
bucket: AWSBucket,
client: S3Client,
origin: AmazonOrigin,
bucketWithCorrectCreatedTime: AWSBucket
): Bucket = {
val bucketName = bucket.name
val bucketRegion =
try {
Option(
client
.getBucketLocation(
GetBucketLocationRequest.builder.bucket(bucketName).build
)
.locationConstraintAsString
)
.filterNot(region => "" == region)
.orElse(Some(Region.US_EAST_1.id))
} catch {
case e: S3Exception if e.awsErrorDetails.errorCode == "NoSuchBucket" =>
log.info(
s"NoSuchBucket for $bucketName in account ${origin.account}",
e
)
None
case e: S3Exception
if e.awsErrorDetails.errorCode == "AuthorizationHeaderMalformed" =>
log.info(
s"AuthorizationHeaderMalformed for $bucketName in account ${origin.account}",
e
)
None
/*
Reaching this case means that the bucket exists, but the user does not have access to it.
For example, the bucket's policy might be set to only allow s3:* access from a specific IP address.
*/
case e: S3Exception
if e.awsErrorDetails().errorCode == "AccessDenied" =>
log.warn(
s"AccessDenied for $bucketName in account ${origin.account}",
e
)
None
case NonFatal(t) =>
throw new IllegalStateException(
s"Failed when building info for bucket $bucketName",
t
)
}
Bucket(
arn = arn(bucketName),
name = bucketName,
region = bucketRegion,
createdTime = bucketWithCorrectCreatedTime.creationDate
)
}
}
case class Bucket(
arn: String,
name: String,
region: Option[String],
createdTime: Instant
) extends IndexedItem {
override def callFromArn: (String) => Call = arn => routes.Api.bucket(arn)
}