def objectExistsWithSizeAndMaybeChecksum()

in media_remover/src/main/scala/S3ObjectChecker.scala [17:57]


  def objectExistsWithSizeAndMaybeChecksum(objectKey: String, fileSize: Long, maybeLocalMd5: Option[String]): Future[Boolean] = {
    logger.info(s"Checking for existing versions of s3://$bucketName/$objectKey with size $fileSize and nearline checksum $maybeLocalMd5")
    val req = ListObjectVersionsRequest.builder().bucket(bucketName).prefix(objectKey).build()

    client.listObjectVersions(req).asScala.map(listObjectVersionsResponse => {
      val versions = listObjectVersionsResponse.versions().asScala
      logger.info(s"s3://$bucketName/$objectKey has ${versions.length} versions")
      versions.foreach(v => logger.info(s"s3://$bucketName/$objectKey @${v.versionId()} with size ${v.size()}, ETag ${v.eTag()} and checksum algorithm ${v.checksumAlgorithmAsStrings()} (has checksum alg: ${v.hasChecksumAlgorithm})"))
      val matchesForSize = versions.filter(_.size() == fileSize)
      if (matchesForSize.isEmpty) {
        logger.info(s"Found no entries for s3://$bucketName/$objectKey with size $fileSize, do require new copy")
        false
      } else {
        /* If we have a nearline MD5, and
           we have ETags that are MD5s
           then, the nearline MD5 must match one of the ETags.
           If no ETags are simple MD5s, we skip this check and are satisfied by the fact that the objectKey and size match.
         */
        maybeLocalMd5 match {
          case Some(localMd5) =>
            val matchesWithMd5 = matchesForSize.filter(m => eTagIsProbablyMd5(m.eTag))
            if (matchesWithMd5.isEmpty) {
              true
            } else {
              matchesWithMd5.count(_.eTag().equals(localMd5)) match { // TODO Check if both checksums are encoded the same way (hex, yes?)
                case 0 => false
                case _ => true
              }
            }
          case None =>
            logger.info(s"Found ${matchesForSize.length} existing entries for s3://$bucketName/$objectKey with size $fileSize, safe to delete locally")
            true
        }
      }
    }).recover({
      case _:NoSuchKeyException => false
      case err:Throwable =>
        logger.error(s"Could not check pre-existing versions for s3://$bucketName/$objectKey: ${err.getMessage}", err)
        throw new RuntimeException(s"Could not check pre-existing versions for s3://$bucketName/$objectKey: ${err.getMessage}")
     })
  }