def mergeMetadata()

in scripts/src/main/scala/com/gu/mediaservice/scripts/ProposeS3Changes.scala [120:195]


  def mergeMetadata(esMetadata: EsDocumentWithMetadata, s3Metadata: ObjectMetadata, picdarData: PicdarData): ObjectMetadata = {
    /* If we can we should retain the legacy keys with _ in so that we don't have to touch the object */
    def bestKeyNameFor(dashVariant: String): String = {
      val hasDashVariant = s3Metadata.metadata.contains(dashVariant)
      // does this have underscore version of key?
      val underscoreVariant = dashVariant.replace("-", "_")
      val hasUnderscoreVariant = s3Metadata.metadata.contains(underscoreVariant)
      if (hasDashVariant && hasUnderscoreVariant) {
        System.err.println(s"Warning: both dash and underscore keys on ${s3Metadata.key}")
      }
      if (hasUnderscoreVariant)
        underscoreVariant
      else
        dashVariant
    }

    /* Find the "best" value, trying hard to detect values that haven't really changed despite encoding or format changes */
    def bestValue(maybeEsValue: Option[String], maybeS3Value: Option[String], isDate: Boolean = false): Option[String] = {
      def isSame(esValue: String, s3Value: String): Boolean = {
        val decodedS3 = URI.decode(s3Value)
        if (!isDate) {
          decodedS3 == esValue
        } else {
          val s3Date = DateTime.parse(decodedS3)
          val esDate = DateTime.parse(esValue)
          s3Date.equals(esDate)
        }
      }
      (maybeEsValue, maybeS3Value) match {
        case (None, s3Value) => s3Value
        case (Some(esValue), Some(s3Value)) if isSame(esValue, s3Value) => Some(s3Value)
        case (Some(esValue), _) => Some(URI.encode(esValue))
      }
    }

    val filenameKey = bestKeyNameFor(ImageStorageProps.filenameMetadataKey)
    val uploadedByKey = bestKeyNameFor(ImageStorageProps.uploadedByMetadataKey)
    val uploadTimeKey = bestKeyNameFor(ImageStorageProps.uploadTimeMetadataKey)

    if (metadataEquivalent(esMetadata, s3Metadata)) {
      s3Metadata
    } else {
      // filename: taken from ES if it exists, then from S3, otherwise empty
      val fileName = bestValue(esMetadata.fileName, s3Metadata.metadata.get(filenameKey))
        .map(s => s.replaceAll(s" (${esMetadata.id})", ""))
      // uploaded by: taken from ES if it exists, then from S3, otherwise empty
      val uploadedBy = bestValue(esMetadata.uploadedBy, s3Metadata.metadata.get(uploadedByKey))
      // uploaded time: taken from ES if it exists, then from S3, otherwise empty
      val uploadTime = bestValue(esMetadata.uploadTime, s3Metadata.metadata.get(uploadTimeKey), isDate = true)

      // Find ALL identifiers in elasticsearch (put "identifier!" on the front and make lowercase)
      val esIdentifiers = esMetadata.identifiers
        .map{ case (key, value ) => s"${ImageStorageProps.identifierMetadataKeyPrefix}$key".toLowerCase -> URI.encode(value)}

      // Find all OUR identifiers in S3 (must have "identifier!" on the front)
      val s3Identifiers = s3Metadata.metadata
        .filter{case (key, _) => key.startsWith(ImageStorageProps.identifierMetadataKeyPrefix)}

      val picdarIdEntry = picdarData.gridToPicdar.get(esMetadata.id).map(s"${ImageStorageProps.identifierMetadataKeyPrefix}picdarurn" ->)

      // Merge the two maps together with any picdar entry
      val allIdentifierKeys = s3Identifiers.keySet ++ esIdentifiers.keySet
      val identifiers = allIdentifierKeys.foldLeft(Map.empty[String, String]) { case (acc, key) =>
        acc ++ bestValue(esIdentifiers.get(key), s3Identifiers.get(key)).map(key ->)
      } ++ picdarIdEntry

      ObjectMetadata(
        key = s3Metadata.key,
        lastModified = s3Metadata.lastModified,
        metadata = identifiers
          ++ fileName.map(fn => filenameKey -> fn)
          ++ uploadedBy.map(ub => uploadedByKey -> ub)
          ++ uploadTime.map(ut => uploadTimeKey -> ut)
      )
    }
  }