usage/app/model/UsageGroup.scala (229 lines of code) (raw):

package model import play.api.libs.json._ import com.gu.contentapi.client.model.v1.{Content, Element, ElementType} import com.gu.contentatom.thrift.{Atom, AtomData} import com.gu.mediaservice.lib.logging.{GridLogging, LogMarker} import com.gu.mediaservice.model.usage.{DigitalUsageMetadata, MediaUsage, PublishedUsageStatus, UsageStatus} import lib.{ContentHelpers, MD5, MediaUsageBuilder, UsageConfig, UsageMetadataBuilder} import org.joda.time.DateTime import scala.collection.compat._ case class UsageGroup( usages: Set[MediaUsage], grouping: String, lastModified: DateTime, isReindex: Boolean = false, maybeStatus: Option[UsageStatus] = None ) class UsageGroupOps(config: UsageConfig, mediaWrapperOps: MediaWrapperOps) extends GridLogging { def buildId(contentWrapper: ContentWrapper) = contentWrapper.id def buildId(printUsage: PrintUsageRecord) = s"print/${MD5.hash(List( Some(printUsage.mediaId), Some(printUsage.printUsageMetadata.pageNumber), Some(printUsage.printUsageMetadata.sectionCode), Some(printUsage.printUsageMetadata.issueDate) ).flatten.map(_.toString).mkString("_"))}" def buildId(syndicationUsageRequest: SyndicationUsageRequest): String = s"syndication/${ MD5.hash(List( syndicationUsageRequest.metadata.partnerName, syndicationUsageRequest.metadata.syndicatedBy, syndicationUsageRequest.mediaId ).mkString("_")) }" def buildId(frontUsageRequest: FrontUsageRequest): String = s"front/${ MD5.hash(List( frontUsageRequest.mediaId, frontUsageRequest.metadata.front ).mkString("_")) }" def buildId(downloadUsageRequest: DownloadUsageRequest): String = s"download/${ MD5.hash(List( downloadUsageRequest.mediaId, downloadUsageRequest.metadata.downloadedBy, downloadUsageRequest.dateAdded.getMillis.toString ).mkString("_")) }" def build(content: Content, status: UsageStatus, lastModified: DateTime, isReindex: Boolean)(implicit logMarker: LogMarker) = ContentWrapper.build(content, status, lastModified).map(contentWrapper => { val usages = createUsages(contentWrapper, isReindex) logger.info(logMarker, s"Built UsageGroup: ${contentWrapper.id}") UsageGroup(usages.toSet, contentWrapper.id, lastModified, isReindex, maybeStatus = Some(status)) }) def build(printUsageRecords: List[PrintUsageRecord]) = printUsageRecords.map(printUsageRecord => { val usageId = UsageIdBuilder.build(printUsageRecord) UsageGroup( Set(MediaUsageBuilder.build(printUsageRecord, usageId, buildId(printUsageRecord))), usageId.toString, printUsageRecord.dateAdded ) }) def build(syndicationUsageRequest: SyndicationUsageRequest): UsageGroup = { val usageGroupId = buildId(syndicationUsageRequest) UsageGroup( Set(MediaUsageBuilder.build(syndicationUsageRequest, usageGroupId)), usageGroupId, syndicationUsageRequest.dateAdded ) } def build(frontUsageRequest: FrontUsageRequest): UsageGroup = { val usageGroupId = buildId(frontUsageRequest) UsageGroup( Set(MediaUsageBuilder.build(frontUsageRequest, usageGroupId)), usageGroupId, frontUsageRequest.dateAdded ) } def build(downloadUsageRequest: DownloadUsageRequest): UsageGroup = { val usageGroupId = buildId(downloadUsageRequest) UsageGroup( Set(MediaUsageBuilder.build(downloadUsageRequest, usageGroupId)), usageGroupId, downloadUsageRequest.dateAdded ) } def createUsages(contentWrapper: ContentWrapper, isReindex: Boolean)(implicit logMarker: LogMarker) = { // Generate unique UUID to track extract job val uuid = java.util.UUID.randomUUID.toString implicit val extractJobLogMarkers: LogMarker = logMarker ++ Map("extract-job-id" -> uuid) val content = contentWrapper.content val usageStatus = contentWrapper.status logger.info(extractJobLogMarkers, s"Extracting images from ${content.id}") val mediaAtomsUsages = extractMediaAtoms(content, usageStatus, isReindex)(extractJobLogMarkers).flatMap { atom => getImageId(atom) match { case Some(id) => val mediaWrapper = mediaWrapperOps.build(mediaId = id, contentWrapper = contentWrapper, usageGroupId = buildId(contentWrapper)) val usage = MediaUsageBuilder.build(mediaWrapper) Seq(createUsagesLogging(usage)(logMarker)) case None => Seq.empty } } val imageElementUsages = extractImageElements(content, usageStatus, isReindex)(extractJobLogMarkers).map { element => val mediaWrapper = mediaWrapperOps.build(mediaId = element.id, contentWrapper = contentWrapper, usageGroupId = buildId(contentWrapper)) val usage = MediaUsageBuilder.build(mediaWrapper) createUsagesLogging(usage)(logMarker) } val cartoonElementUsages = extractCartoonUniqueMediaIds(content).map { mediaId => val mediaWrapper = mediaWrapperOps.build(mediaId, contentWrapper = contentWrapper, usageGroupId = buildId(contentWrapper)) val usage = MediaUsageBuilder.build(mediaWrapper) createUsagesLogging(usage)(logMarker) } // TODO capture images from interactive embeds mediaAtomsUsages ++ imageElementUsages ++ cartoonElementUsages } private def createUsagesLogging(usage: MediaUsage)(implicit logMarker: LogMarker) = { logger.info(logMarker, s"Built MediaUsage for ${usage.mediaId}") usage.digitalUsageMetadata.foreach(meta => { logger.info(logMarker, s"Digital MediaUsage for ${usage.mediaId}: ${Json.toJson(meta)}") }) usage.printUsageMetadata.foreach(meta => { logger.info(logMarker, s"Print MediaUsage for ${usage.mediaId}: ${Json.toJson(meta)}") }) usage } private def isNewContent(content: Content, usageStatus: UsageStatus): Boolean = { val dateLimit = new DateTime(config.usageDateLimit) val contentFirstPublished = ContentHelpers.getContentFirstPublished(content) usageStatus match { case PublishedUsageStatus => contentFirstPublished.exists(_.isAfter(dateLimit)) case _ => true } } private def extractMediaAtoms(content: Content, usageStatus: UsageStatus, isReindex: Boolean)(implicit logMarker: LogMarker) = { val isNew = isNewContent(content, usageStatus) val shouldRecordUsages = isNew || isReindex if (shouldRecordUsages) { logger.info(logMarker, s"Passed shouldRecordUsages for media atom") val groupedMediaAtoms = groupMediaAtoms(content) if (groupedMediaAtoms.isEmpty) { logger.info(logMarker, s"No Matching media atoms found") } else { logger.info(logMarker, s"${groupedMediaAtoms.length} media atoms found") groupedMediaAtoms.foreach(atom => logger.info(logMarker, s"Matching media atom ${atom.id} found")) } groupedMediaAtoms } else { logger.info(logMarker, s"Failed shouldRecordUsages for media atoms: isNew-$isNew isReindex-$isReindex") Seq.empty } } private def groupMediaAtoms(content: Content) = { val mediaAtoms = content.atoms match { case Some(atoms) => atoms.media match { case Some(mediaAtoms) => filterOutAtomsWithNoImage(mediaAtoms.toSeq) case _ => Seq.empty } case _ => Seq.empty } mediaAtoms } private def filterOutAtomsWithNoImage(atoms: Seq[Atom]): Seq[Atom] = { for { atom <- atoms atomId = getImageId(atom) if atomId.isDefined } yield atom } private def getImageId(atom: Atom): Option[String] = { try { val posterImage = atom.data.asInstanceOf[AtomData.Media].media.posterImage posterImage match { case Some(image) => Some(image.mediaId.replace(s"${config.apiUri}/images/", "")) case _ => None } } catch { case e: ClassCastException => None } } private def extractCartoonUniqueMediaIds(content: Content): Set[String] = (for { elements <- content.elements.toSeq cartoonElement <- elements.filter(_.`type` == ElementType.Cartoon) asset <- cartoonElement.assets.toSeq data <- asset.typeData.toSeq cartoonVariants <- data.cartoonVariants.toSeq cartoonVariant <- cartoonVariants image <- cartoonVariant.images mediaId <- image.mediaId } yield mediaId).toSet private def extractImageElements( content: Content, usageStatus: UsageStatus, isReindex: Boolean )(implicit logMarker: LogMarker): Seq[Element] = { val isNew = isNewContent(content, usageStatus) val shouldRecordUsages = isNew || isReindex if (shouldRecordUsages) { logger.info(logMarker, s"Passed shouldRecordUsages") val groupedElements = groupImageElements(content) if (groupedElements.isEmpty) { logger.info(logMarker, s"No Matching elements found") } else { groupedElements.foreach(elements => { logger.info(logMarker, s"${elements.length} elements found") elements.foreach(element => logger.info(logMarker, s"Matching element ${element.id} found")) }) } groupedElements.getOrElse(Seq.empty) } else { logger.info(logMarker, s"Failed shouldRecordUsages: isNew-$isNew isReindex-$isReindex") Seq.empty } } private def groupImageElements(content: Content): Option[Seq[Element]] = { content.elements.map(elements => { elements.filter(_.`type` == ElementType.Image) .groupBy(_.id) .map(_._2.head).to(collection.immutable.Seq) }) } } case class MediaWrapper( mediaId: String, usageGroupId: String, contentStatus: UsageStatus, usageMetadata: DigitalUsageMetadata, lastModified: DateTime) class MediaWrapperOps(usageMetadataBuilder: UsageMetadataBuilder) { def build(mediaId: String, contentWrapper: ContentWrapper, usageGroupId: String): MediaWrapper = { val usageMetadata = usageMetadataBuilder.build(contentWrapper.content) MediaWrapper(mediaId, usageGroupId, contentWrapper.status, usageMetadata, contentWrapper.lastModified) } }