app/collectors/bucket.scala (126 lines of code) (raw):

package collectors import java.time.Instant import agent._ import conf.AWS import controllers.routes import play.api.mvc.Call import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.model.{ GetBucketLocationRequest, ListBucketsRequest, S3Exception, Bucket => AWSBucket } import software.amazon.awssdk.services.s3.{S3Client, S3Configuration} import utils.Logging import scala.jdk.CollectionConverters._ import scala.language.{postfixOps, reflectiveCalls} import scala.util.control.NonFatal class BucketCollectorSet(accounts: Accounts) extends CollectorSet[Bucket]( ResourceType("bucket"), accounts, Some(Global) ) { val lookupCollector: PartialFunction[Origin, Collector[Bucket]] = { case amazon: AmazonOrigin => AWSBucketCollector(amazon, resource, amazon.crawlRate(resource.name)) } } case class AWSBucketCollector( origin: AmazonOrigin, resource: ResourceType, crawlRate: CrawlRate ) extends Collector[Bucket] with Logging { // The `useArnRegionEnabled` flag enables us to receive data on buckets in all available AWS regions // https://stackoverflow.com/questions/46769493/how-enable-force-global-bucket-access-in-aws-s3-sdk-java-2-0 val s3Configuration = S3Configuration.builder.useArnRegionEnabled(true).build // The region of the S3 Client is hardcoded to EU-WEST-1, because AWS-Global and US-EAST-1 do not return all buckets. // We decided to hardcode this here, instead of creating another enum for simplicity val client = S3Client.builder .credentialsProvider(origin.credentials.provider) .region(Region.EU_WEST_1) .overrideConfiguration(AWS.clientConfig) .serviceConfiguration(s3Configuration) .build // This second S3 client, with a region of US-EAST-1, gives us the correct createdTime value unlike the other regions, // as documented here: https://stackoverflow.com/questions/54353373/getting-incorrect-creation-dates-using-aws-s3 val clientForCorrectCreatedTime = S3Client.builder .credentialsProvider(origin.credentials.provider) .region(Region.US_EAST_1) .build def crawl: Iterable[Bucket] = { val request = ListBucketsRequest.builder.build val listBuckets = client.listBuckets(request).buckets().asScala.toList log.info( s"Total number of buckets with S3 Client region EU-WEST-1 for account ${origin.account} ${listBuckets.length}" ) val listBucketsForCorrectCreatedTime = clientForCorrectCreatedTime.listBuckets(request).buckets.asScala.toList log.info( s"Total number of buckets with S3 Client region US-EAST-1 for account ${origin.account} ${listBucketsForCorrectCreatedTime.length}" ) listBuckets.zip(listBucketsForCorrectCreatedTime).map { case (bucket, bucketWithCorrectCreatedTime) => Bucket.fromApiData(bucket, client, origin, bucketWithCorrectCreatedTime) } } } object Bucket extends Logging { private def arn(bucketName: String) = s"arn:aws:s3:::$bucketName" def fromApiData( bucket: AWSBucket, client: S3Client, origin: AmazonOrigin, bucketWithCorrectCreatedTime: AWSBucket ): Bucket = { val bucketName = bucket.name val bucketRegion = try { Option( client .getBucketLocation( GetBucketLocationRequest.builder.bucket(bucketName).build ) .locationConstraintAsString ) .filterNot(region => "" == region) .orElse(Some(Region.US_EAST_1.id)) } catch { case e: S3Exception if e.awsErrorDetails.errorCode == "NoSuchBucket" => log.info( s"NoSuchBucket for $bucketName in account ${origin.account}", e ) None case e: S3Exception if e.awsErrorDetails.errorCode == "AuthorizationHeaderMalformed" => log.info( s"AuthorizationHeaderMalformed for $bucketName in account ${origin.account}", e ) None /* Reaching this case means that the bucket exists, but the user does not have access to it. For example, the bucket's policy might be set to only allow s3:* access from a specific IP address. */ case e: S3Exception if e.awsErrorDetails().errorCode == "AccessDenied" => log.warn( s"AccessDenied for $bucketName in account ${origin.account}", e ) None case NonFatal(t) => throw new IllegalStateException( s"Failed when building info for bucket $bucketName", t ) } Bucket( arn = arn(bucketName), name = bucketName, region = bucketRegion, createdTime = bucketWithCorrectCreatedTime.creationDate ) } } case class Bucket( arn: String, name: String, region: Option[String], createdTime: Instant ) extends IndexedItem { override def callFromArn: (String) => Call = arn => routes.Api.bucket(arn) }