in sagemaker-spark-sdk/src/main/scala/com/amazonaws/services/sagemaker/sparksdk/SageMakerEstimator.scala [239:276]
private[sparksdk] def resolveS3Path(s3Resource : S3Resource,
trainingJobName : String, config : SparkConf): S3DataPath = {
s3Resource match {
case s3DataPath : S3DataPath =>
new S3DataPath(s3DataPath.bucket, s3DataPath.objectPath + "/" + trainingJobName)
case S3PathFromConfig(configKey) =>
val configValue = config.get(configKey)
if(configValue.matches("^s3[a|n]?://.+")) {
val s3URI = configValue.stripSuffix("/") + "/" + trainingJobName
S3DataPath.fromS3URI(s3URI)
} else {
val prefix = UUID.randomUUID().toString + "/" + trainingJobName
S3DataPath(configValue, prefix)
}
case S3AutoCreatePath() =>
val account = stsClient.getCallerIdentity(new GetCallerIdentityRequest).getAccount
val region = s3Client.getRegionName
val bucketName = s"$account-sagemaker-$region"
try {
s3Client.createBucket(bucketName)
log.info(s"Created bucket $bucketName.")
} catch {
case ex : AmazonS3Exception =>
// This exception is thrown if the S3 client is in us-east-1 but the bucket is not.
if (Option(ex.getErrorCode).getOrElse("").contains("BucketAlreadyOwnedByYou")) {
log.info(s"Using bucket $bucketName, which you already own.")
} else if (Option(ex.getErrorCode).getOrElse("")
.contains("AuthorizationHeaderMalformed")) {
log.info(s"Bucket $bucketName already exists in a different region, " +
s"not ${s3Client.getRegionName}. Attempting to use bucket $bucketName")
} else {
throw ex
}
}
val prefix = UUID.randomUUID().toString + "/" + trainingJobName
S3DataPath(bucketName, prefix)
}
}