in scripts/src/main/scala/com/gu/mediaservice/scripts/ProposeS3Changes.scala [120:195]
def mergeMetadata(esMetadata: EsDocumentWithMetadata, s3Metadata: ObjectMetadata, picdarData: PicdarData): ObjectMetadata = {
/* If we can we should retain the legacy keys with _ in so that we don't have to touch the object */
def bestKeyNameFor(dashVariant: String): String = {
val hasDashVariant = s3Metadata.metadata.contains(dashVariant)
// does this have underscore version of key?
val underscoreVariant = dashVariant.replace("-", "_")
val hasUnderscoreVariant = s3Metadata.metadata.contains(underscoreVariant)
if (hasDashVariant && hasUnderscoreVariant) {
System.err.println(s"Warning: both dash and underscore keys on ${s3Metadata.key}")
}
if (hasUnderscoreVariant)
underscoreVariant
else
dashVariant
}
/* Find the "best" value, trying hard to detect values that haven't really changed despite encoding or format changes */
def bestValue(maybeEsValue: Option[String], maybeS3Value: Option[String], isDate: Boolean = false): Option[String] = {
def isSame(esValue: String, s3Value: String): Boolean = {
val decodedS3 = URI.decode(s3Value)
if (!isDate) {
decodedS3 == esValue
} else {
val s3Date = DateTime.parse(decodedS3)
val esDate = DateTime.parse(esValue)
s3Date.equals(esDate)
}
}
(maybeEsValue, maybeS3Value) match {
case (None, s3Value) => s3Value
case (Some(esValue), Some(s3Value)) if isSame(esValue, s3Value) => Some(s3Value)
case (Some(esValue), _) => Some(URI.encode(esValue))
}
}
val filenameKey = bestKeyNameFor(ImageStorageProps.filenameMetadataKey)
val uploadedByKey = bestKeyNameFor(ImageStorageProps.uploadedByMetadataKey)
val uploadTimeKey = bestKeyNameFor(ImageStorageProps.uploadTimeMetadataKey)
if (metadataEquivalent(esMetadata, s3Metadata)) {
s3Metadata
} else {
// filename: taken from ES if it exists, then from S3, otherwise empty
val fileName = bestValue(esMetadata.fileName, s3Metadata.metadata.get(filenameKey))
.map(s => s.replaceAll(s" (${esMetadata.id})", ""))
// uploaded by: taken from ES if it exists, then from S3, otherwise empty
val uploadedBy = bestValue(esMetadata.uploadedBy, s3Metadata.metadata.get(uploadedByKey))
// uploaded time: taken from ES if it exists, then from S3, otherwise empty
val uploadTime = bestValue(esMetadata.uploadTime, s3Metadata.metadata.get(uploadTimeKey), isDate = true)
// Find ALL identifiers in elasticsearch (put "identifier!" on the front and make lowercase)
val esIdentifiers = esMetadata.identifiers
.map{ case (key, value ) => s"${ImageStorageProps.identifierMetadataKeyPrefix}$key".toLowerCase -> URI.encode(value)}
// Find all OUR identifiers in S3 (must have "identifier!" on the front)
val s3Identifiers = s3Metadata.metadata
.filter{case (key, _) => key.startsWith(ImageStorageProps.identifierMetadataKeyPrefix)}
val picdarIdEntry = picdarData.gridToPicdar.get(esMetadata.id).map(s"${ImageStorageProps.identifierMetadataKeyPrefix}picdarurn" ->)
// Merge the two maps together with any picdar entry
val allIdentifierKeys = s3Identifiers.keySet ++ esIdentifiers.keySet
val identifiers = allIdentifierKeys.foldLeft(Map.empty[String, String]) { case (acc, key) =>
acc ++ bestValue(esIdentifiers.get(key), s3Identifiers.get(key)).map(key ->)
} ++ picdarIdEntry
ObjectMetadata(
key = s3Metadata.key,
lastModified = s3Metadata.lastModified,
metadata = identifiers
++ fileName.map(fn => filenameKey -> fn)
++ uploadedBy.map(ub => uploadedByKey -> ub)
++ uploadTime.map(ut => uploadTimeKey -> ut)
)
}
}