common/app/model/dotcomrendering/pageElements/PageElement.scala (1,757 lines of code) (raw):
package model.dotcomrendering.pageElements
import com.gu.contentapi.client.model.v1
import com.gu.contentapi.client.model.v1.ElementType.{List => GuList, Map => GuMap, _}
import com.gu.contentapi.client.model.v1.EmbedTracksType.DoesNotTrack
import com.gu.contentapi.client.model.v1.{
EmbedTracking,
SponsorshipType,
TimelineElementFields,
WitnessElementFields,
BlockElement => ApiBlockElement,
Sponsorship => ApiSponsorship,
}
import common.{Chronos, Edition}
import conf.Configuration
import layout.ContentWidths.{BodyMedia, ImmersiveMedia, MainMedia}
import model.content._
import model.dotcomrendering.InteractiveSwitchOver
import model.dotcomrendering.pageElements.CartoonExtraction._
import model.{AudioAsset, ImageAsset, ImageElement, ImageMedia, VideoAsset}
import org.joda.time.DateTime
import org.jsoup.Jsoup
import play.api.libs.json._
import views.support.cleaner.SoundcloudHelper
import views.support.{ImgSrc, SrcSet, Video700}
import java.net.URLEncoder
import scala.jdk.CollectionConverters._
import scala.util.Try
// ------------------------------------------------------
// PageElement Supporting Types and Traits
// ------------------------------------------------------
// TODO dates are being rendered as strings to avoid duplication of the
// to-string logic, but ultimately we should pass unformatted date info to
// DCR.
case class TimelineAtomEvent(
title: String,
date: String,
body: Option[String],
toDate: Option[String],
unixDate: Long,
toUnixDate: Option[Long],
)
object TimelineAtomEvent {
implicit val timelineAtomEventWrites: Writes[TimelineAtomEvent] = Json.writes[TimelineAtomEvent]
}
case class Sponsorship(
sponsorName: String,
sponsorLogo: String,
sponsorLink: String,
sponsorshipType: SponsorshipType,
)
object Sponsorship {
def apply(sponsorship: ApiSponsorship): Sponsorship = {
Sponsorship(
sponsorship.sponsorName,
sponsorship.sponsorLogo,
sponsorship.sponsorLink,
sponsorship.sponsorshipType,
)
}
implicit val SponsorshipWrites: Writes[Sponsorship] = new Writes[Sponsorship] {
def writes(sponsorship: Sponsorship): JsObject =
Json.obj(
"sponsorName" -> sponsorship.sponsorName,
"sponsorLogo" -> sponsorship.sponsorLogo,
"sponsorLink" -> sponsorship.sponsorLink,
"sponsorshipType" -> sponsorship.sponsorshipType.name,
)
}
}
case class NSImage1(url: String, width: Long)
object NSImage1 {
implicit val NSImage1Writes: Writes[NSImage1] = Json.writes[NSImage1]
def imageMediaToSequence(image: ImageMedia): Seq[NSImage1] = {
image.imageCrops
.filter(_.url.isDefined)
.map(i => NSImage1(i.url.get, i.fields("width").toLong))
// calling .get is safe here because of the previous filter
}
}
trait ThirdPartyEmbeddedContent {
def isThirdPartyTracking: Boolean
def source: Option[String]
def sourceDomain: Option[String]
}
// ------------------------------------------------------
// PageElement
// ------------------------------------------------------
/*
These elements are used for the Dotcom Rendering, they are essentially the new version of the
model.liveblog._ elements but replaced in full here
*/
sealed trait PageElement
// Note:
// In the file PageElement-Identifiers.md you will find a discussion of identifiers used by PageElements
// Also look for "03feb394-a17d-4430-8384-edd1891e0d01"
case class AudioAtomBlockElement(
id: String,
kicker: String,
title: Option[String],
coverUrl: String,
trackUrl: String,
duration: Int,
contentId: String,
) extends PageElement
object AudioAtomBlockElement {
implicit val AudioAtomBlockElementWrites: Writes[AudioAtomBlockElement] = Json.writes[AudioAtomBlockElement]
}
// We are currently using AudioBlockElement as a catch all for audio errors, skipping the first definition
// See comment: 2e5ac4fd-e7f1-4c04-bdcd-ceadd2dc5d4c
case class AudioBlockElement(assets: Seq[AudioAsset], id: Option[String] = None) extends PageElement
object AudioBlockElement {
implicit val AudioBlockElementWrites: Writes[AudioBlockElement] = Json.writes[AudioBlockElement]
}
case class BlockquoteBlockElement(html: String) extends PageElement
object BlockquoteBlockElement {
implicit val BlockquoteBlockElementWrites: Writes[BlockquoteBlockElement] = Json.writes[BlockquoteBlockElement]
}
case class CalloutBlockElement(
id: String,
calloutsUrl: Option[String],
activeFrom: Long,
displayOnSensitive: Boolean,
formId: Int,
title: String,
description: String,
tagName: String,
formFields: List[CalloutFormField],
) extends PageElement
object CalloutBlockElement {
implicit val CalloutBlockElementWrites: Writes[CalloutBlockElement] = Json.writes[CalloutBlockElement]
}
case class CalloutBlockElementV2(
id: String,
calloutsUrl: Option[String],
activeFrom: Long,
activeUntil: Option[Long],
displayOnSensitive: Boolean,
formId: Int,
prompt: String,
title: String,
description: String,
tagName: String,
formFields: List[CalloutFormField],
isNonCollapsible: Boolean,
contacts: Option[Seq[Contact]],
) extends PageElement
object CalloutBlockElementV2 {
implicit val CalloutBlockElementV2Writes: Writes[CalloutBlockElementV2] = Json.writes[CalloutBlockElementV2]
}
case class DcrCartoonVariant(
viewportSize: String,
images: List[ImageAsset],
)
case class CartoonBlockElement(
variants: List[DcrCartoonVariant],
role: Role,
credit: Option[String],
caption: Option[String],
alt: Option[String],
displayCredit: Option[Boolean],
) extends PageElement
object CartoonBlockElement {
implicit val CartoonVariantWrites: Writes[DcrCartoonVariant] = Json.writes[DcrCartoonVariant]
implicit val CartoonBlockElementWrites: Writes[CartoonBlockElement] = Json.writes[CartoonBlockElement]
}
// The extension of the ChartAtomBlockElement, is experimental. Three fields have been added,
// html: String, css: Option[String], js: Option[String], but it looks like, the html string we get from CAPI,
// contains all the css and js required to display the atom.
// Note tha The CAPI answer also gives structured data, so maybe one day we could try and use that instead of
// precompiled html.
case class ChartAtomBlockElement(
id: String,
url: String,
html: String,
css: Option[String],
js: Option[String],
title: String,
) extends PageElement
object ChartAtomBlockElement {
implicit val ChartAtomBlockElementWrites: Writes[ChartAtomBlockElement] = Json.writes[ChartAtomBlockElement]
}
case class CodeBlockElement(html: String, language: String, isMandatory: Boolean) extends PageElement
object CodeBlockElement {
implicit val CodeBlockElementWrites: Writes[CodeBlockElement] = Json.writes[CodeBlockElement]
}
case class CommentBlockElement(
body: String,
avatarURL: String,
profileURL: String,
profileName: String,
permalink: String,
dateTime: String,
) extends PageElement
object CommentBlockElement {
implicit val CommentBlockElementWrites: Writes[CommentBlockElement] = Json.writes[CommentBlockElement]
}
case class ContentAtomBlockElement(atomId: String) extends PageElement
object ContentAtomBlockElement {
implicit val ContentAtomBlockElementWrites: Writes[ContentAtomBlockElement] = Json.writes[ContentAtomBlockElement]
}
case class DocumentBlockElement(
embedUrl: Option[String],
height: Option[Int],
width: Option[Int],
title: Option[String],
isMandatory: Option[Boolean],
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object DocumentBlockElement {
implicit val DocumentBlockElementWrites: Writes[DocumentBlockElement] = Json.writes[DocumentBlockElement]
}
case class EmbedBlockElement(
html: String,
safe: Option[Boolean],
alt: Option[String],
isMandatory: Boolean,
role: Option[String],
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
caption: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object EmbedBlockElement {
implicit val EmbedBlockElementWrites: Writes[EmbedBlockElement] = Json.writes[EmbedBlockElement]
}
case class ExplainerAtomBlockElement(id: String, title: String, body: String) extends PageElement
object ExplainerAtomBlockElement {
implicit val ExplainerAtomBlockElementWrites: Writes[ExplainerAtomBlockElement] =
Json.writes[ExplainerAtomBlockElement]
}
case class FormBlockElement(html: Option[String]) extends PageElement
object FormBlockElement {
implicit val FormBlockElementWrites: Writes[FormBlockElement] = Json.writes[FormBlockElement]
}
case class GenericAtomBlockElement(
id: String,
url: String,
html: Option[String],
css: Option[String],
js: Option[String],
) extends PageElement
// GenericAtomBlockElement is the only BlockElement, despite following the Atom BlockElement naming convention, that doesn't correspond to a single atom type.
// We use it to carry to DCR atoms that do not (yet) have their on dedicated BlockElement and are rendered in DCR as iframes.
// - {url} for src
// - {html, css, js} for srcdoc
object GenericAtomBlockElement {
implicit val GenericAtomBlockElementWrites: Writes[GenericAtomBlockElement] = Json.writes[GenericAtomBlockElement]
}
case class GuideAtomBlockElement(
id: String,
label: String,
title: String,
img: Option[String],
html: String,
credit: String,
) extends PageElement
object GuideAtomBlockElement {
implicit val GuideAtomBlockElementWrites: Writes[GuideAtomBlockElement] = Json.writes[GuideAtomBlockElement]
}
case class GuVideoBlockElement(
assets: Seq[VideoAsset],
imageMedia: ImageMedia,
caption: String,
url: String,
originalUrl: String,
html: String,
source: String,
role: Role,
) extends PageElement
object GuVideoBlockElement {
implicit val GuVideoBlockElementWrites: Writes[GuVideoBlockElement] = Json.writes[GuVideoBlockElement]
}
case class ImageSource(weighting: String, srcSet: Seq[SrcSet])
object ImageSource {
implicit val ImageSourceWrites: Writes[ImageSource] = Json.writes[ImageSource]
}
case class ImageBlockElement(
media: ImageMedia,
data: Map[String, String],
displayCredit: Option[Boolean],
role: Role,
imageSources: Seq[ImageSource],
) extends PageElement
object ImageBlockElement {
implicit val ImageBlockElementWrites: Writes[ImageBlockElement] = Json.writes[ImageBlockElement]
}
case class InteractiveAtomBlockElement(
id: String,
url: String,
html: Option[String],
css: Option[String],
js: Option[String],
placeholderUrl: Option[String],
role: Option[String],
title: String,
) extends PageElement
object InteractiveAtomBlockElement {
implicit val InteractiveAtomBlockElementWrites: Writes[InteractiveAtomBlockElement] =
Json.writes[InteractiveAtomBlockElement]
}
case class InteractiveBlockElement(
url: Option[String],
alt: Option[String],
scriptUrl: Option[String],
role: Option[String],
isMandatory: Option[Boolean],
caption: Option[String],
) extends PageElement
object InteractiveBlockElement {
implicit val InteractiveBlockElementWrites: Writes[InteractiveBlockElement] = Json.writes[InteractiveBlockElement]
}
case class InstagramBlockElement(
url: String,
html: Option[String],
hasCaption: Boolean,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object InstagramBlockElement {
implicit val InstagramBlockElementWrites: Writes[InstagramBlockElement] = Json.writes[InstagramBlockElement]
}
case class ListItem(
elements: Seq[PageElement],
title: Option[String],
bio: Option[String],
endNote: Option[String],
contributorIds: Option[scala.collection.Seq[String]],
byline: Option[String],
bylineHtml: Option[String],
contributorImageOverrideUrl: Option[String],
) extends PageElement
object ListItem {
implicit val listItemWrites: Writes[ListItem] = Json.writes[ListItem]
}
case class ListBlockElement(
items: Seq[ListItem],
listElementType: Option[String],
) extends PageElement
object ListBlockElement {
implicit val listBlockElementWrites: Writes[ListBlockElement] = Json.writes[ListBlockElement]
}
case class TimelineEvent(
title: Option[String],
date: Option[String],
label: Option[String],
main: Option[PageElement],
body: Seq[PageElement],
)
object TimelineEvent {
implicit val timelineEvent: Writes[TimelineEvent] = Json.writes[TimelineEvent]
}
case class TimelineSection(
title: Option[String],
events: Seq[TimelineEvent],
)
object TimelineSection {
implicit val timelineSection: Writes[TimelineSection] = Json.writes[TimelineSection]
}
case class TimelineBlockElement(
sections: Seq[TimelineSection],
) extends PageElement
object TimelineBlockElement {
implicit val timelineBlockElement: Writes[TimelineBlockElement] = Json.writes[TimelineBlockElement]
}
case class MapBlockElement(
embedUrl: String,
originalUrl: String,
source: Option[String],
caption: String,
title: String,
width: Int,
height: Int,
isThirdPartyTracking: Boolean,
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object MapBlockElement {
implicit val MapBlockElementWrites: Writes[MapBlockElement] = Json.writes[MapBlockElement]
}
case class MediaAtomBlockElementMediaAsset(
url: String,
mimeType: Option[String],
)
object MediaAtomBlockElementMediaAsset {
implicit val MediaAtomBlockElementMediaAssetWrites: Writes[MediaAtomBlockElementMediaAsset] =
Json.writes[MediaAtomBlockElementMediaAsset]
def fromMediaAsset(asset: MediaAsset): MediaAtomBlockElementMediaAsset = {
MediaAtomBlockElementMediaAsset(asset.id, asset.mimeType)
}
}
case class MediaAtomBlockElement(
id: String,
title: String,
defaultHtml: String,
assets: Seq[MediaAtomBlockElementMediaAsset],
duration: Option[Long],
posterImage: Option[Seq[NSImage1]],
expired: Option[Boolean],
activeVersion: Option[Long],
channelId: Option[String],
) extends PageElement
object MediaAtomBlockElement {
implicit val MediaAtomBlockElementWrites: Writes[MediaAtomBlockElement] = Json.writes[MediaAtomBlockElement]
}
case class MembershipBlockElement(
originalUrl: Option[String],
linkText: Option[String],
linkPrefix: Option[String],
title: Option[String],
venue: Option[String],
location: Option[String],
identifier: Option[String],
image: Option[String],
price: Option[String],
) extends PageElement
object MembershipBlockElement {
implicit val MembershipBlockElementWrites: Writes[MembershipBlockElement] = Json.writes[MembershipBlockElement]
}
case class ProfileAtomBlockElementItem(title: Option[String], body: String)
object ProfileAtomBlockElementItem {
implicit val GuideAtomBlockElementItemWrites: Writes[ProfileAtomBlockElementItem] =
Json.writes[ProfileAtomBlockElementItem]
}
case class ProfileAtomBlockElement(
id: String,
label: String,
title: String,
img: Option[String],
html: String,
items: List[ProfileAtomBlockElementItem],
credit: String,
) extends PageElement
object ProfileAtomBlockElement {
implicit val ProfileAtomBlockElementWrites: Writes[ProfileAtomBlockElement] = Json.writes[ProfileAtomBlockElement]
}
case class PullquoteBlockElement(
html: Option[String],
role: Role,
attribution: Option[String],
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object PullquoteBlockElement {
implicit val PullquoteBlockElementWrites: Writes[PullquoteBlockElement] = Json.writes[PullquoteBlockElement]
}
case class QABlockElement(id: String, title: String, img: Option[String], html: String, credit: String)
extends PageElement
object QABlockElement {
implicit val QABlockElementWrites: Writes[QABlockElement] = Json.writes[QABlockElement]
}
case class QuizAtomAnswer(
id: String,
text: String,
revealText: Option[String],
answerBuckets: Seq[String],
isCorrect: Boolean,
)
case class QuizAtomResultBucket(id: String, title: String, description: String)
case class QuizAtomQuestion(
id: String,
text: String,
answers: Seq[QuizAtomAnswer],
imageUrl: Option[String],
imageAlt: Option[String],
)
case class QuizAtomResultGroup(id: String, title: String, shareText: String, minScore: Int)
case class QuizAtomBlockElement(
id: String,
quizType: String,
questions: Seq[QuizAtomQuestion],
resultBuckets: Seq[QuizAtomResultBucket],
resultGroups: Seq[QuizAtomResultGroup],
) extends PageElement
object QuizAtomBlockElement {
implicit val QuizAtomAnswerWrites: Writes[QuizAtomAnswer] = Json.writes[QuizAtomAnswer]
implicit val QuizAtomQuestionWrites: Writes[QuizAtomQuestion] = Json.writes[QuizAtomQuestion]
implicit val QuizAtomResultBucketWrites: Writes[QuizAtomResultBucket] = Json.writes[QuizAtomResultBucket]
implicit val QuizAtomResultGroupWrites: Writes[QuizAtomResultGroup] = Json.writes[QuizAtomResultGroup]
implicit val QuizAtomBlockElementWrites: Writes[QuizAtomBlockElement] = Json.writes[QuizAtomBlockElement]
}
case class RichLinkBlockElement(
url: Option[String],
text: Option[String],
prefix: Option[String],
role: Role,
sponsorship: Option[Sponsorship],
) extends PageElement
object RichLinkBlockElement {
implicit val RichLinkBlockElementWrites: Writes[RichLinkBlockElement] = Json.writes[RichLinkBlockElement]
}
case class SoundcloudBlockElement(
html: String,
id: String,
isTrack: Boolean,
isMandatory: Boolean,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object SoundcloudBlockElement {
implicit val SoundCloudBlockElementWrites: Writes[SoundcloudBlockElement] = Json.writes[SoundcloudBlockElement]
}
case class SpotifyBlockElement(
embedUrl: Option[String],
height: Option[Int],
width: Option[Int],
title: Option[String],
caption: Option[String],
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
role: Role,
) extends PageElement
with ThirdPartyEmbeddedContent
object SpotifyBlockElement {
implicit val SpotifyBlockElementWrites: Writes[SpotifyBlockElement] = Json.writes[SpotifyBlockElement]
}
case class SubheadingBlockElement(html: String) extends PageElement
object SubheadingBlockElement {
implicit val SubheadingBlockElementWrites: Writes[SubheadingBlockElement] = Json.writes[SubheadingBlockElement]
}
case class TableBlockElement(html: Option[String], role: Role, isMandatory: Option[Boolean]) extends PageElement
object TableBlockElement {
implicit val TableBlockElementWrites: Writes[TableBlockElement] = Json.writes[TableBlockElement]
}
case class TextBlockElement(html: String) extends PageElement
object TextBlockElement {
implicit val TextBlockElementWrites: Writes[TextBlockElement] = Json.writes[TextBlockElement]
}
case class TimelineAtomBlockElement(
id: String,
title: String,
description: Option[String],
events: Seq[TimelineAtomEvent],
) extends PageElement
object TimelineAtomBlockElement {
implicit val timelineAtomBlockElementWrites: Writes[TimelineAtomBlockElement] = Json.writes[TimelineAtomBlockElement]
}
case class TweetBlockElement(
html: String,
url: String,
id: String,
hasMedia: Boolean,
role: Role,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object TweetBlockElement {
implicit val TweetBlockElementWrites: Writes[TweetBlockElement] = Json.writes[TweetBlockElement]
}
case class UnknownBlockElement(html: Option[String]) extends PageElement
object UnknownBlockElement {
implicit val UnknownBlockElementWrites: Writes[UnknownBlockElement] = Json.writes[UnknownBlockElement]
}
case class VideoBlockElement(
caption: Option[String],
title: Option[String],
url: String,
originalUrl: String,
height: Int,
width: Int,
role: Role,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object VideoBlockElement {
implicit val VideoBlockElementWrites: Writes[VideoBlockElement] = Json.writes[VideoBlockElement]
}
case class VideoFacebookBlockElement(
caption: Option[String],
title: Option[String],
url: String,
originalUrl: String,
embedUrl: Option[String],
height: Int,
width: Int,
role: Role,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object VideoFacebookBlockElement {
implicit val VideoFacebookBlockElementWrites: Writes[VideoFacebookBlockElement] =
Json.writes[VideoFacebookBlockElement]
}
case class VideoVimeoBlockElement(
caption: Option[String],
title: Option[String],
url: String,
originalUrl: String,
embedUrl: Option[String],
height: Int,
width: Int,
role: Role,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object VideoVimeoBlockElement {
implicit val VideoVimeoElementWrites: Writes[VideoVimeoBlockElement] = Json.writes[VideoVimeoBlockElement]
}
case class VideoYoutubeBlockElement(
caption: Option[String],
title: Option[String],
url: String,
originalUrl: String,
embedUrl: Option[String],
height: Int,
width: Int,
role: Role,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object VideoYoutubeBlockElement {
implicit val VideoYoutubeBlockElementWrites: Writes[VideoYoutubeBlockElement] = Json.writes[VideoYoutubeBlockElement]
}
case class VineBlockElement(
url: String,
height: Int,
width: Int,
originalUrl: String,
title: String,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
role: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object VineBlockElement {
implicit val VideoYoutubeBlockElementWrites: Writes[VineBlockElement] = Json.writes[VineBlockElement]
}
case class WitnessBlockElementAssetsElementTypeData(name: Option[String])
object WitnessBlockElementAssetsElementTypeData {
implicit val w1Writes: Writes[WitnessBlockElementAssetsElementTypeData] =
Json.writes[WitnessBlockElementAssetsElementTypeData]
}
case class WitnessBlockElementAssetsElement(
`type`: String,
mimeType: Option[String],
file: Option[String],
typeData: Option[WitnessBlockElementAssetsElementTypeData],
)
object WitnessBlockElementAssetsElement {
implicit val w2Writes: Writes[WitnessBlockElementAssetsElement] =
Json.writes[WitnessBlockElementAssetsElement]
}
sealed trait WitnessTypeData
case class WitnessTypeDataImage(
`type`: String,
url: Option[String],
originalUrl: Option[String],
witnessEmbedType: Option[String],
mediaId: Option[String],
source: Option[String],
title: Option[String],
authorName: Option[String],
authorUsername: Option[String],
authorWitnessProfileUrl: Option[String],
authorGuardianProfileUrl: Option[String],
caption: Option[String],
alt: Option[String],
html: Option[String],
apiUrl: Option[String],
photographer: Option[String],
dateCreated: Option[String],
) extends WitnessTypeData
object WitnessTypeDataImage {
implicit val w3Writes: Writes[WitnessTypeDataImage] = Json.writes[WitnessTypeDataImage]
}
case class WitnessTypeDataVideo(
`type`: String,
url: Option[String],
originalUrl: Option[String],
witnessEmbedType: Option[String],
source: Option[String],
title: Option[String],
description: Option[String],
authorName: Option[String],
authorUsername: Option[String],
authorWitnessProfileUrl: Option[String],
authorGuardianProfileUrl: Option[String],
width: Option[Int],
height: Option[Int],
html: Option[String],
apiUrl: Option[String],
dateCreated: Option[String],
youtubeUrl: Option[String],
youtubeSource: Option[String],
youtubeTitle: Option[String],
youtubeDescription: Option[String],
youtubeAuthorName: Option[String],
youtubeHtml: Option[String],
) extends WitnessTypeData
object WitnessTypeDataVideo {
implicit val w3Writes: Writes[WitnessTypeDataVideo] = Json.writes[WitnessTypeDataVideo]
}
case class WitnessTypeDataText(
`type`: String,
url: Option[String],
originalUrl: Option[String],
witnessEmbedType: Option[String],
source: Option[String],
title: Option[String],
description: Option[String],
authorName: Option[String],
authorUsername: Option[String],
authorWitnessProfileUrl: Option[String],
authorGuardianProfileUrl: Option[String],
apiUrl: Option[String],
dateCreated: Option[String],
) extends WitnessTypeData
object WitnessTypeDataText {
implicit val WitnessTypeDataTextWrites: Writes[WitnessTypeDataText] = Json.writes[WitnessTypeDataText]
}
case class WitnessBlockElement(
assets: Seq[WitnessBlockElementAssetsElement],
witnessTypeData: WitnessTypeData,
isThirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
) extends PageElement
with ThirdPartyEmbeddedContent
object WitnessBlockElement {
implicit val w4Writes: Writes[WitnessTypeData] = Json.writes[WitnessTypeData]
implicit val w5Writes: Writes[WitnessBlockElement] = Json.writes[WitnessBlockElement]
}
case class YoutubeBlockElement(
id: String,
assetId: String,
channelId: Option[String],
mediaTitle: String,
overrideImage: Option[String],
posterImage: Option[Seq[NSImage1]],
expired: Boolean,
duration: Option[Long],
altText: Option[String],
) extends PageElement
/*
The difference between `overrideImage` and `posterImage`
When the `YoutubeBlockElement` is in main media position then `overrideImage` is set to the main media image.
The reasons is:
Since moving to Atoms, the multimedia team have commented that they're reluctant to use videos
in main media as it makes the content look stale. This is because an Atom only has 1 image. Before Atoms, it was
possible to set a different image for a video on each use. This change is bringing that functionality back.
source: https://github.com/guardian/frontend/pull/20637
In all cases `posterImage` carries the video own images.
*/
object YoutubeBlockElement {
implicit val YoutubeBlockElementWrites: Writes[YoutubeBlockElement] = Json.writes[YoutubeBlockElement]
}
//noinspection ScalaStyle
object PageElement {
def isSupported(element: PageElement): Boolean = {
// remove unsupported elements. Cross-reference with dotcom-rendering supported elements.
element match {
case _: AudioBlockElement => true
case _: AudioAtomBlockElement => true
case _: BlockquoteBlockElement => true
case _: CalloutBlockElement => true
case _: CalloutBlockElementV2 => true
case _: CartoonBlockElement => true
case _: ChartAtomBlockElement => true
case _: CodeBlockElement => true
case _: CommentBlockElement => true
case _: ContentAtomBlockElement => true
case _: DocumentBlockElement => true
case _: EmbedBlockElement => true
case _: ExplainerAtomBlockElement => true
case _: GenericAtomBlockElement => true
case _: GuideAtomBlockElement => true
case _: GuVideoBlockElement => true
case _: ImageBlockElement => true
case _: InstagramBlockElement => true
case _: InteractiveAtomBlockElement => true
case _: InteractiveBlockElement => true
case _: MapBlockElement => true
case _: MediaAtomBlockElement => true
case _: ProfileAtomBlockElement => true
case _: PullquoteBlockElement => true
case _: QABlockElement => true
case _: QuizAtomBlockElement => true
case _: RichLinkBlockElement => true
case _: SoundcloudBlockElement => true
case _: SpotifyBlockElement => true
case _: SubheadingBlockElement => true
case _: TextBlockElement => true
case _: TimelineAtomBlockElement => true
case _: TweetBlockElement => true
case _: VideoBlockElement => true
case _: VideoFacebookBlockElement => true
case _: VideoVimeoBlockElement => true
case _: VideoYoutubeBlockElement => true
case _: YoutubeBlockElement => true
case _: WitnessBlockElement => true
case _: VineBlockElement => true
case _: ListBlockElement => true
case _: TimelineBlockElement => true
// TODO we should quick fail here for these rather than pointlessly go to DCR
case table: TableBlockElement if table.isMandatory.exists(identity) => true
case _ => false
}
}
def make(
element: ApiBlockElement,
addAffiliateLinks: Boolean,
pageUrl: String,
atoms: Iterable[Atom],
isMainBlock: Boolean,
isImmersive: Boolean,
campaigns: Option[JsValue],
calloutsUrl: Option[String],
overrideImage: Option[ImageElement],
edition: Edition,
webPublicationDate: DateTime,
): List[PageElement] = {
def extractAtom: Option[Atom] =
for {
d <- element.contentAtomTypeData
atom <- atoms.find(_.id == d.atomId)
} yield atom
val elementRole: Option[String] =
for {
d <- element.contentAtomTypeData
role <- d.role
} yield role
element.`type` match {
case Text =>
val textCleaners =
TextCleaner.affiliateLinks(pageUrl, addAffiliateLinks) _ andThen
TextCleaner.sanitiseLinks(edition)
for {
block <- element.textTypeData.toList
text <- block.html.toList
element <- TextCleaner.split(text)
cleanedElement = (element._1, textCleaners(element._2))
} yield {
cleanedElement match {
case ("h2", heading) => SubheadingBlockElement(heading)
case ("blockquote", blockquote) => BlockquoteBlockElement(blockquote)
case (_, para) => TextBlockElement(para)
}
}
case Tweet =>
(for {
data <- element.tweetTypeData
id <- data.id
html <- data.html
url <- data.originalUrl
} yield {
TweetBlockElement(
html,
url,
id,
element.assets.nonEmpty,
Role(data.role),
containsThirdPartyTracking(element.tracking),
data.source,
data.sourceDomain,
)
}).toList
case RichLink =>
List(
RichLinkBlockElement(
element.richLinkTypeData.flatMap(_.originalUrl),
element.richLinkTypeData.flatMap(_.linkText),
element.richLinkTypeData.flatMap(_.linkPrefix),
Role(element.richLinkTypeData.flatMap(_.role)),
element.richLinkTypeData.flatMap(_.sponsorship).map(Sponsorship(_)),
),
)
case Cartoon => element.cartoonTypeData.flatMap(extractCartoon).toList
case Image =>
def ensureHTTPS(src: String): String = src.replace("http:", "https:")
val imageAssets = element.assets.zipWithIndex
.map { case (a, i) => ImageAsset.make(a, i) }
val imageRoleWidthsByBreakpoint =
if (isMainBlock) MainMedia
else if (isImmersive) ImmersiveMedia
else BodyMedia
val imageSources = imageRoleWidthsByBreakpoint.all.map { case (weighting, widths) =>
val srcSet: Seq[SrcSet] = widths.breakpoints.flatMap { b =>
Seq(
ImgSrc.srcsetForBreakpoint(
b,
imageRoleWidthsByBreakpoint.immersive.breakpoints,
maybeImageMedia = Some(ImageMedia(imageAssets.toSeq)),
),
ImgSrc.srcsetForBreakpoint(
b,
imageRoleWidthsByBreakpoint.immersive.breakpoints,
maybeImageMedia = Some(ImageMedia(imageAssets.toSeq)),
hidpi = true,
),
)
}.flatten
// A few very old articles use non-https hosts, which won't render
val httpsSrcSet = srcSet.map(set => set.copy(src = ensureHTTPS(set.src)))
ImageSource(weighting, httpsSrcSet)
}.toSeq
// The default role is used when an image doesn't have one and is then meant to be Inline,
// that having been said, there are exceptions to this rule.
// For instance, if the page is immersive and the picture is `mainMedia` and the image
// doesn't have a role, then the role should be Immersive, thereby overriding the default Inline
val defaultRole = (isMainBlock, isImmersive) match {
case (true, true) => Immersive
case _ => Inline
}
List(
ImageBlockElement(
ImageMedia(imageAssets.toSeq),
imageDataFor(element),
element.imageTypeData.flatMap(_.displayCredit),
Role(element.imageTypeData.flatMap(_.role), defaultRole),
imageSources,
),
)
case Audio => audioToPageElement(element).toList
case Video => {
def secureVideoHtmlUrls(html: String, element: ApiBlockElement): String = {
/*
Date: 04th September 2020
author: Pascal
Enhance HTML to process cases such as
<video data-media-id=\"gu-video-457132757\" class=\"gu-video\" controls=\"controls\" poster=\"http://static.guim.co.uk/sys-images/Guardian/Pix/audio/video/2015/6/11/1434025823959/KP_270483_crop_640x360.jpg\">
<source src=\"http://cdn.theguardian.tv/mainwebsite/2015/06/11/150611spacediary_desk.mp4\"/>
<source src=\"http://cdn.theguardian.tv/3gp/small/2015/06/11/150611spacediary_small.3gp\"/>
<source src=\"http://cdn.theguardian.tv/HLS/2015/06/11/150611spacediary.m3u8\"/>
<source src=\"http://cdn.theguardian.tv/3gp/large/2015/06/11/150611spacediary_large.3gp\"/>
<source src=\"http://cdn.theguardian.tv/webM/2015/06/11/150611spacediary_synd_768k_vp8.webm\"/>
</video>
Originally found at https://www.theguardian.com/books/2020/sep/02/top-10-books-about-space-travel-samantha-cristoforetti?dcr=false
We need to replace the links by secure links.
There are three ways to do this
1. Replace "http:" by "https:" in the HTML string; but that's a bit dangerous.
2. Replace "http://cdn.theguardian.tv" by "https://cdn.theguardian.tv"; but that's limiting
3. Replace all the unsecured links by the secure ones. This is perfect but the problem is to list the (unsecured) links
To achieve that we capitalise on the fact that the links are listed in element.assets
The outcome is
<video data-media-id=\"gu-video-457132757\" class=\"gu-video\" controls=\"controls\" poster=\"http://static.guim.co.uk/sys-images/Guardian/Pix/audio/video/2015/6/11/1434025823959/KP_270483_crop_640x360.jpg\">
<source src=\"https://cdn.theguardian.tv/mainwebsite/2015/06/11/150611spacediary_desk.mp4\"/>
<source src=\"https://cdn.theguardian.tv/3gp/small/2015/06/11/150611spacediary_small.3gp\"/>
<source src=\"https://cdn.theguardian.tv/HLS/2015/06/11/150611spacediary.m3u8\"/>
<source src=\"https://cdn.theguardian.tv/3gp/large/2015/06/11/150611spacediary_large.3gp\"/>
<source src=\"https://cdn.theguardian.tv/webM/2015/06/11/150611spacediary_synd_768k_vp8.webm\"/>
</video>
*/
element.assets.toList
.foldLeft(html) { (h, asset) =>
val url = asset.file.getOrElse("")
h.replaceAll(url, url.replace("http:", "https:"))
}
}
if (element.assets.nonEmpty) {
List(
GuVideoBlockElement(
element.assets.map(VideoAsset.make).toSeq,
ImageMedia(
element.assets
.filter(_.mimeType.exists(_.startsWith("image")))
.zipWithIndex
.map { case (a, i) =>
ImageAsset.make(a, i)
}
.toSeq,
),
element.videoTypeData.flatMap(_.caption).getOrElse(""),
element.videoTypeData.flatMap(_.url).getOrElse(""),
element.videoTypeData.flatMap(_.originalUrl).getOrElse(""),
secureVideoHtmlUrls(element.videoTypeData.flatMap(_.html).getOrElse(""), element),
element.videoTypeData.flatMap(_.source).getOrElse(""),
Role(element.videoTypeData.flatMap(_.role)),
),
)
} else videoToPageElement(element).toList
}
case Membership =>
element.membershipTypeData
.map(m =>
MembershipBlockElement(
m.originalUrl,
m.linkText,
m.linkPrefix,
m.title,
m.venue,
m.location,
m.identifier,
m.image,
m.price,
),
)
.toList
case Comment =>
(for {
c <- element.commentTypeData
html <- c.html
} yield {
CommentBlockElement(
body = CommentCleaner.getBody(html),
avatarURL = CommentCleaner.getAvatar(html),
dateTime = CommentCleaner.getDateTime(html),
permalink = c.originalUrl.getOrElse(""),
profileURL = c.authorUrl.getOrElse(""),
profileName = c.authorName.getOrElse(""),
)
}).toList
case Embed => embedToPageElement(element, campaigns, calloutsUrl).toList
// This process returns either:
// 1. SoundcloudBlockElement
// 2. EmbedBlockElement
// 3. CalloutBlockElement
case Callout =>
element.calloutTypeData
.map { callout =>
CalloutExtraction.extractCalloutByCampaignId(
callout,
campaigns,
calloutsUrl,
)
}
.flatten
.toList
case Contentatom =>
(extractAtom match {
case Some(audio: AudioAtom) => {
Some(
AudioAtomBlockElement(
id = audio.id,
kicker = audio.data.kicker,
title = audio.atom.title,
coverUrl = audio.data.coverUrl,
trackUrl = audio.data.trackUrl,
duration = audio.data.duration,
contentId = audio.data.contentId,
),
)
}
case Some(chart: ChartAtom) => {
val encodedId = URLEncoder.encode(chart.id, "UTF-8")
// chart.id is a uuid, so there is no real need to url-encode it but just to be safe
Some(
ChartAtomBlockElement(
id = chart.id,
url = s"${Configuration.ajax.url}/embed/atom/chart/$encodedId",
html = chart.html, // This is atom.defaultHtml
css = None, // hardcoded to None during experimental period
js = None, // hardcoded to None during experimental period
title = chart.title,
),
)
}
case Some(explainer: ExplainerAtom) => {
Some(ExplainerAtomBlockElement(explainer.id, explainer.title, explainer.body))
}
case Some(guide: GuideAtom) => {
val html = guide.data.items
.map(item => s"${item.title.map(t => s"<p><strong>${t}</strong></p>").getOrElse("")}${item.body}")
.mkString("")
Some(
GuideAtomBlockElement(
id = guide.id,
label = guide.data.typeLabel.getOrElse("Quick Guide"),
title = guide.atom.title.getOrElse(""),
img = guide.image.flatMap(ImgSrc.getAmpImageUrl),
html = html,
credit = guide.credit.getOrElse(""),
),
)
}
case Some(interactive: InteractiveAtom) => {
val isLegacy =
InteractiveSwitchOver.date.isAfter(Chronos.jodaDateTimeToJavaTimeDateTime(webPublicationDate))
val encodedId = URLEncoder.encode(interactive.id, "UTF-8")
Some(
InteractiveAtomBlockElement(
id = interactive.id,
url = s"${Configuration.ajax.url}/embed/atom/interactive/$encodedId",
// Note, we parse legacy interactives to do minimal cleaning of
// the HTML (e.g. to ensure all tags are closed). Some break
// without this. E.g.
// https://www.theguardian.com/info/ng-interactive/2021/mar/17/make-sense-of-the-week-with-australia-weekend.
html =
if (isLegacy) Some(Jsoup.parseBodyFragment(interactive.html).outerHtml)
else Some(interactive.html),
css = Some(interactive.css),
js = interactive.mainJS,
placeholderUrl = interactive.placeholderUrl,
role = elementRole,
title = interactive.title,
),
)
}
case Some(mediaAtom: MediaAtom) => {
val imageOverride = overrideImage.map(_.images).flatMap(Video700.bestSrcFor)
val altText = overrideImage.flatMap(_.images.allImages.headOption.flatMap(_.altText))
mediaAtom match {
case youtube if mediaAtom.activeAssets.headOption.exists(_.platform == MediaAssetPlatform.Youtube) => {
mediaAtom.activeAssets.headOption.map(asset => {
YoutubeBlockElement(
id = mediaAtom.id, // CAPI ID
assetId = asset.id, // Youtube ID
channelId = mediaAtom.channelId, // Channel ID
mediaTitle = mediaAtom.title, // Caption
overrideImage = if (isMainBlock) imageOverride else None,
posterImage = mediaAtom.posterImage.map(NSImage1.imageMediaToSequence),
expired = mediaAtom.expired.getOrElse(false),
duration = mediaAtom.duration, // Duration in seconds
altText = if (isMainBlock) altText else None,
)
})
}
case _ =>
Some(
MediaAtomBlockElement(
mediaAtom.id,
mediaAtom.title,
mediaAtom.defaultHtml,
mediaAtom.assets.map(MediaAtomBlockElementMediaAsset.fromMediaAsset),
mediaAtom.duration,
mediaAtom.posterImage.map(NSImage1.imageMediaToSequence),
mediaAtom.expired,
mediaAtom.activeVersion,
mediaAtom.channelId,
),
)
}
}
case Some(profile: ProfileAtom) => {
val html = profile.data.items
.map(item => s"${item.title.map(t => s"<p><strong>${t}</strong></p>").getOrElse("")}${item.body}")
.mkString("")
val items = profile.data.items.toList.map(item => ProfileAtomBlockElementItem(item.title, item.body))
Some(
ProfileAtomBlockElement(
id = profile.id,
label = profile.data.typeLabel.getOrElse("Profile"),
title = profile.atom.title.getOrElse(""),
img = profile.image.flatMap(ImgSrc.getAmpImageUrl),
html = html,
items = items,
credit = profile.credit.getOrElse(""),
),
)
}
case Some(qa: QandaAtom) => {
Some(
QABlockElement(
id = qa.id,
title = qa.atom.title.getOrElse(""),
img = qa.image.flatMap(ImgSrc.getAmpImageUrl),
html = qa.data.item.body,
credit = qa.credit.getOrElse(""),
),
)
}
case Some(timeline: TimelineAtom) => {
Some(
TimelineAtomBlockElement(
id = timeline.id,
title = timeline.atom.title.getOrElse(""),
description = timeline.data.description,
events = timeline.data.events
.map(event =>
TimelineAtomEvent(
title = event.title,
date = TimelineAtom.renderFormattedDate(event.date, event.dateFormat),
body = event.body,
toDate = event.toDate.map(date => TimelineAtom.renderFormattedDate(date, event.dateFormat)),
unixDate = event.date,
toUnixDate = event.toDate,
),
)
.toSeq,
),
)
}
case Some(quizAtom: QuizAtom) => {
val questions = quizAtom.content.questions.map { q =>
QuizAtomQuestion(
id = q.id,
text = q.text,
answers = q.answers.map(a =>
QuizAtomAnswer(
id = a.id,
text = a.text,
revealText = a.revealText,
answerBuckets = a.buckets,
isCorrect = a.weight == 1,
),
),
imageUrl = q.imageMedia.flatMap(i => ImgSrc.getAmpImageUrl(i.imageMedia)),
imageAlt = q.imageMedia
.flatMap(i => i.imageMedia.masterImage.flatMap(_.altText))
// Remove surrounding quotes from alt text, e.g
// "hello world" => hello world
.map(_.replaceAll("^\"|\"$", "")),
)
}
Some(
QuizAtomBlockElement(
id = quizAtom.id,
quizType = quizAtom.quizType,
questions = questions,
resultBuckets = quizAtom.content.resultBuckets.map { bucket =>
QuizAtomResultBucket(bucket.id, bucket.title, bucket.description)
},
resultGroups =
quizAtom.content.resultGroups.map(x => QuizAtomResultGroup(x.id, x.title, x.shareText, x.minScore)),
),
)
}
// Here we capture all the atom types which are not yet supported.
// ContentAtomBlockElement is mapped to null in the DCR source code.
case Some(atom) => Some(ContentAtomBlockElement(atom.id))
case _ => None
}).toList
case GuMap =>
{
for {
mapElem <- element.mapTypeData
originalUrl <- mapElem.originalUrl
source <- mapElem.source
html <- mapElem.html
embedUrl <- getIframeSrc(html)
width <- getIframeWidth(html)
height <- getIframeHeight(html)
caption = mapElem.caption.getOrElse("")
title = mapElem.title.getOrElse("")
thirdPartyTracking = containsThirdPartyTracking(element.tracking)
} yield MapBlockElement(
embedUrl,
originalUrl,
Some(source),
caption,
title,
width,
height,
thirdPartyTracking,
mapElem.sourceDomain,
)
}.toList
case Pullquote =>
element.pullquoteTypeData
.map(d =>
PullquoteBlockElement(
d.html,
Role(d.role),
d.attribution,
containsThirdPartyTracking(element.tracking),
d.source,
d.sourceDomain,
),
)
.toList
case Interactive =>
element.interactiveTypeData
.map(d =>
InteractiveBlockElement(d.iframeUrl, d.alt, d.scriptUrl.map(ensureHTTPS), d.role, d.isMandatory, d.caption),
)
.toList
case Table => element.tableTypeData.map(d => TableBlockElement(d.html, Role(d.role), d.isMandatory)).toList
case Witness => {
(for {
wtd <- element.witnessTypeData
embedType <- wtd.witnessEmbedType
} yield {
embedType match {
case "image" => Some(makeWitnessBlockElementImage(element, wtd))
case "video" => Some(makeWitnessBlockElementVideo(element, wtd))
case "text" => Some(makeWitnessBlockElementText(element, wtd))
case _ => None
}
}).toList.flatten
}
case Document =>
element.documentTypeData
.map(d =>
DocumentBlockElement(
embedUrl = getEmbedUrl(d.html),
height = d.height,
width = d.width,
title = d.title,
isMandatory = d.isMandatory,
isThirdPartyTracking = containsThirdPartyTracking(element.tracking),
source = d.source,
sourceDomain = d.sourceDomain,
),
)
.toList
case Instagram =>
element.instagramTypeData
.map(d =>
InstagramBlockElement(
d.originalUrl,
d.html,
d.caption.isDefined,
containsThirdPartyTracking(element.tracking),
Some(d.source),
d.sourceDomain,
),
)
.toList
case Vine =>
(for {
fields <- element.vineTypeData
html <- fields.html
iframeSrc <- getIframeSrc(html)
} yield {
VineBlockElement(
iframeSrc,
getIframeHeight(html).getOrElse(0),
getIframeWidth(html).getOrElse(0),
fields.originalUrl,
fields.title,
containsThirdPartyTracking(element.tracking),
Some(fields.source),
fields.sourceDomain,
fields.role,
)
}).toList
case Code => {
(for {
data <- element.codeTypeData
} yield {
CodeBlockElement(data.html, data.language, false)
}).toList
}
case Form => List(FormBlockElement(None))
case GuList =>
element.listTypeData.map { listTypeData =>
ListBlockElement(
items = listTypeData.items.map { item =>
makeListItem(
addAffiliateLinks,
pageUrl,
atoms,
isImmersive,
campaigns,
calloutsUrl,
edition,
webPublicationDate,
item,
)
}.toSeq,
listElementType = listTypeData.`type`.map(_.name),
)
}.toList
case Timeline =>
element.timelineTypeData.map { timelineTypeData =>
TimelineBlockElement(
sections = makeTimelineSection(
addAffiliateLinks,
pageUrl,
atoms,
isImmersive,
campaigns,
calloutsUrl,
edition,
webPublicationDate,
timelineTypeData,
),
)
}.toList
case EnumUnknownElementType(f) => List(UnknownBlockElement(None))
case _ => Nil
}
}
private def makeTimelineSection(
addAffiliateLinks: Boolean,
pageUrl: String,
atoms: Iterable[Atom],
isImmersive: Boolean,
campaigns: Option[JsValue],
calloutsUrl: Option[String],
edition: Edition,
webPublicationDate: DateTime,
timelineTypeData: TimelineElementFields,
) = {
timelineTypeData.sections.map { section =>
TimelineSection(
title = section.title,
events = section.events.map { event =>
TimelineEvent(
title = event.title,
date = event.date,
label = event.label,
main = event.main.flatMap { mainBlock =>
PageElement
.make(
mainBlock,
addAffiliateLinks,
pageUrl,
atoms,
isMainBlock = true,
isImmersive,
campaigns,
calloutsUrl,
overrideImage = None,
edition,
webPublicationDate,
)
.headOption
},
body = event.body.flatMap { bodyBlock =>
PageElement.make(
bodyBlock,
addAffiliateLinks,
pageUrl,
atoms,
isMainBlock = false,
isImmersive,
campaigns,
calloutsUrl,
overrideImage = None,
edition,
webPublicationDate,
)
}.toSeq,
)
}.toSeq,
)
}.toSeq
}
private def makeListItem(
addAffiliateLinks: Boolean,
pageUrl: String,
atoms: Iterable[Atom],
isImmersive: Boolean,
campaigns: Option[JsValue],
calloutsUrl: Option[String],
edition: Edition,
webPublicationDate: DateTime,
item: v1.ListItem,
) = {
ListItem(
elements = item.elements.flatMap { element =>
PageElement.make(
element,
addAffiliateLinks,
pageUrl,
atoms,
isMainBlock = false,
isImmersive,
campaigns,
calloutsUrl,
overrideImage = None,
edition,
webPublicationDate,
)
}.toSeq,
title = item.title,
bio = item.bio,
endNote = item.endNote,
contributorIds = item.contributorIds,
byline = item.byline,
bylineHtml = item.bylineHtml,
contributorImageOverrideUrl = item.contributorImageOverrideUrl,
)
}
private[this] def ensureHTTPS(url: String): String = {
val http = "http://"
if (url.startsWith(http)) {
"https://" + url.stripPrefix(http)
} else url
}
private def makeWitnessAssets(element: ApiBlockElement): Seq[WitnessBlockElementAssetsElement] = {
element.assets.map(i =>
WitnessBlockElementAssetsElement(
i.`type`.toString(),
i.mimeType,
i.file,
i.typeData.map(x => WitnessBlockElementAssetsElementTypeData(x.name)),
),
)
}.toSeq
private def makeWitnessBlockElementImage(element: ApiBlockElement, wtd: WitnessElementFields): WitnessBlockElement = {
WitnessBlockElement(
assets = makeWitnessAssets(element),
witnessTypeData = WitnessTypeDataImage(
`type` = "image",
url = wtd.url,
originalUrl = wtd.originalUrl,
witnessEmbedType = wtd.witnessEmbedType,
mediaId = wtd.mediaId,
source = wtd.source,
title = wtd.title,
authorName = wtd.authorName,
authorUsername = wtd.authorUsername,
authorWitnessProfileUrl = wtd.authorWitnessProfileUrl,
authorGuardianProfileUrl = wtd.authorGuardianProfileUrl,
caption = wtd.caption,
alt = wtd.alt,
html = wtd.html,
apiUrl = wtd.apiUrl,
photographer = wtd.photographer,
dateCreated = wtd.dateCreated.map(date => date.iso8601),
),
containsThirdPartyTracking(element.tracking),
wtd.source,
wtd.sourceDomain,
)
}
private def makeWitnessBlockElementVideo(element: ApiBlockElement, wtd: WitnessElementFields): WitnessBlockElement = {
WitnessBlockElement(
assets = makeWitnessAssets(element),
witnessTypeData = WitnessTypeDataVideo(
`type` = "video",
url = wtd.url,
originalUrl = wtd.originalUrl,
witnessEmbedType = wtd.witnessEmbedType,
source = wtd.source,
title = wtd.title,
description = wtd.description,
authorName = wtd.authorName,
authorUsername = wtd.authorUsername,
authorWitnessProfileUrl = wtd.authorWitnessProfileUrl,
authorGuardianProfileUrl = wtd.authorGuardianProfileUrl,
width = wtd.width,
height = wtd.height,
html = wtd.html,
apiUrl = wtd.apiUrl,
dateCreated = wtd.dateCreated.map(date => date.iso8601),
youtubeUrl = wtd.youtubeUrl,
youtubeSource = wtd.youtubeSource,
youtubeTitle = wtd.youtubeTitle,
youtubeDescription = wtd.youtubeDescription,
youtubeAuthorName = wtd.youtubeAuthorName,
youtubeHtml = wtd.youtubeHtml,
),
containsThirdPartyTracking(element.tracking),
wtd.source,
wtd.sourceDomain,
)
}
private def makeWitnessBlockElementText(element: ApiBlockElement, wtd: WitnessElementFields): WitnessBlockElement = {
WitnessBlockElement(
assets = makeWitnessAssets(element),
witnessTypeData = WitnessTypeDataText(
`type` = "text",
url = wtd.url,
originalUrl = wtd.originalUrl,
witnessEmbedType = wtd.witnessEmbedType,
source = wtd.source,
title = wtd.title,
description = wtd.description,
authorName = wtd.authorName,
authorUsername = wtd.authorUsername,
authorWitnessProfileUrl = wtd.authorWitnessProfileUrl,
authorGuardianProfileUrl = wtd.authorGuardianProfileUrl,
apiUrl = wtd.apiUrl,
dateCreated = wtd.dateCreated.map(date => date.iso8601),
),
containsThirdPartyTracking(element.tracking),
wtd.source,
wtd.sourceDomain,
)
}
private[this] def getIframeSrc(html: String): Option[String] = {
val doc = Jsoup.parseBodyFragment(html)
doc.getElementsByTag("iframe").asScala.headOption.map(_.attr("src"))
}
private[this] def getIframeWidth(html: String, fallback: Int = 0): Option[Int] = {
val doc = Jsoup.parseBodyFragment(html)
doc
.getElementsByTag("iframe")
.asScala
.headOption
.map(_.attr("width"))
.map(attr => Try(attr.toInt).getOrElse(fallback))
}
private[this] def getIframeHeight(html: String, fallback: Int = 0): Option[Int] = {
val doc = Jsoup.parseBodyFragment(html)
doc
.getElementsByTag("iframe")
.asScala
.headOption
.map(_.attr("height"))
.map(attr => Try(attr.toInt).getOrElse(fallback))
}
private def extractSoundcloudBlockElement(
html: String,
isMandatory: Boolean,
thirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
): Option[SoundcloudBlockElement] = {
val src = getIframeSrc(html)
src.flatMap { s =>
(SoundcloudHelper.getTrackIdFromUrl(s), SoundcloudHelper.getPlaylistIdFromUrl(s)) match {
case (Some(track), _) =>
Some(
SoundcloudBlockElement(html, track, isTrack = true, isMandatory, thirdPartyTracking, source, sourceDomain),
)
case (_, Some(playlist)) =>
Some(
SoundcloudBlockElement(
html,
playlist,
isTrack = false,
isMandatory,
thirdPartyTracking,
source,
sourceDomain,
),
)
case _ =>
None
}
}
}
private def extractChartDatawrapperEmbedBlockElement(
html: String,
role: Option[String],
thirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
caption: Option[String],
): Option[EmbedBlockElement] = {
// This only returns an EmbedBlockELement if referring to a charts-datawrapper.s3.amazonaws.com
for {
src <- getIframeSrc(html)
if src.contains("charts-datawrapper.s3.amazonaws.com")
} yield {
EmbedBlockElement(html, None, None, false, role, thirdPartyTracking, source, sourceDomain, caption)
}
}
private def extractGenericEmbedBlockElement(
html: String,
role: Option[String],
thirdPartyTracking: Boolean,
source: Option[String],
sourceDomain: Option[String],
caption: Option[String],
): Option[EmbedBlockElement] = {
// This returns a EmbedBlockELement to handle any iframe that wasn't captured by extractChartDatawrapperEmbedBlockElement
for {
src <- getIframeSrc(html)
} yield {
EmbedBlockElement(html, None, None, false, role, thirdPartyTracking, source, sourceDomain, caption)
}
}
private def extractSpotifyBlockElement(
element: ApiBlockElement,
thirdPartyTracking: Boolean,
): Option[SpotifyBlockElement] = {
for {
d <- element.audioTypeData
html <- d.html
src <- getIframeSrc(html)
// Deciding if the source is Spotify. Note that we cannot rely on d.source due to lack of data integrity. Some
// self described Spotify elements are actually charts-datawrapper.s3.amazonaws.com
if src.contains("spotify.com")
} yield {
SpotifyBlockElement(
getEmbedUrl(d.html),
getIframeHeight(html, fallback = 540),
getIframeWidth(html, fallback = 460),
d.title,
d.caption,
thirdPartyTracking,
d.source,
d.sourceDomain,
Role(d.role),
)
}
}
private def audioToPageElement(element: ApiBlockElement) = {
element.audioTypeData.map { d =>
val mandatory = true
val thirdPartyTracking = containsThirdPartyTracking(element.tracking)
/*
comment id: 2e5ac4fd-e7f1-4c04-bdcd-ceadd2dc5d4c
Audio is a versatile carrier. It carries both audio and, incorrectly, non audio (in legacy content).
The audioToPageElement function performs the transformation of an Audio element to the appropriate
PageElement.
The function returns either:
1. SoundcloudBlockElement
2. SpotifyBlockElement
3. EmbedBlockElement
4. AudioBlockElement
Note: EmbedBlockElement is returned by both extractChartDatawrapperEmbedBlockElement and extractGenericEmbedBlockElement
The former catches charts from charts-datawrapper.s3.amazonaws.com while the latter captures any iframe.
*/
d.html
.flatMap { html =>
extractSoundcloudBlockElement(html, mandatory, thirdPartyTracking, d.source, d.sourceDomain)
.orElse {
extractSpotifyBlockElement(element, thirdPartyTracking)
}
.orElse {
extractChartDatawrapperEmbedBlockElement(
html,
d.role,
thirdPartyTracking,
d.source,
d.sourceDomain,
d.caption,
)
}
.orElse {
extractGenericEmbedBlockElement(
html,
d.role,
thirdPartyTracking,
d.source,
d.sourceDomain,
d.caption,
)
}
}
.getOrElse {
AudioBlockElement(element.assets.toList.map(asset => AudioAsset.make(asset, Some(d))))
}
}
}
private def embedToPageElement(
element: ApiBlockElement,
campaigns: Option[JsValue],
calloutsUrl: Option[String],
): Option[PageElement] = {
val pageElement = for {
d <- element.embedTypeData
html <- d.html
mandatory = d.isMandatory.getOrElse(false)
thirdPartyTracking = containsThirdPartyTracking(element.tracking)
isCallout = CalloutExtraction.isCallout(html)
} yield {
if (isCallout) CalloutExtraction.extractCallout(html, campaigns, calloutsUrl)
else {
Some(
extractSoundcloudBlockElement(html, mandatory, thirdPartyTracking, d.source, d.sourceDomain).getOrElse(
EmbedBlockElement(
html,
d.safeEmbedCode,
d.alt,
mandatory,
d.role,
thirdPartyTracking,
d.source,
d.sourceDomain,
d.caption,
),
),
)
}
}
pageElement.flatten
}
private def imageDataFor(element: ApiBlockElement): Map[String, String] = {
element.imageTypeData.map { d =>
Map(
"copyright" -> d.copyright,
"alt" -> d.alt,
"caption" -> d.caption,
"credit" -> d.credit,
) collect { case (k, Some(v)) => (k, v) }
} getOrElse Map()
}
private def getEmbedUrl(html: Option[String]): Option[String] = {
html match {
case Some(ht) => getIframeSrc(ht)
case _ => None
}
}
private def videoToPageElement(element: ApiBlockElement): Option[PageElement] = {
for {
data <- element.videoTypeData
source <- data.source
caption = data.caption
title = data.title
originalUrl <- data.originalUrl
height <- data.height
width <- data.width
url = data.url.getOrElse(originalUrl)
thirdPartyTracking = containsThirdPartyTracking(element.tracking)
} yield {
source.toLowerCase match {
case "youtube" =>
VideoYoutubeBlockElement(
caption,
title,
url,
originalUrl,
getEmbedUrl(data.html),
height,
width,
Role(data.role),
thirdPartyTracking,
data.source,
data.sourceDomain,
)
case "vimeo" =>
VideoVimeoBlockElement(
caption,
title,
url,
originalUrl,
getEmbedUrl(data.html),
height,
width,
Role(data.role),
thirdPartyTracking,
data.source,
data.sourceDomain,
)
case "facebook" =>
VideoFacebookBlockElement(
caption,
title,
url,
originalUrl,
getEmbedUrl(data.html),
height,
width,
Role(data.role),
thirdPartyTracking,
data.source,
data.sourceDomain,
)
case _ =>
VideoBlockElement(
caption,
title,
url,
originalUrl,
height,
width,
Role(data.role),
thirdPartyTracking,
data.source,
data.sourceDomain,
)
}
}
}
private[pageElements] def containsThirdPartyTracking(embedTracking: Option[EmbedTracking]): Boolean = {
embedTracking.map(_.tracks) match {
case Some(DoesNotTrack) => false
case None => false
case _ => true
}
}
/*
Note: The JSON serialization of `PageElement`s shows a "_type" attribute (that is a crucial part of how DCR
recognise and parse `BlockElement`s). This attribute is added by Play Framework itself.
See: https://www.playframework.com/documentation/2.7.x/ScalaJsonAutomated#Requirements
TODO:
Because this attribute is a defacto a part of the frontend DCR datamodel contract, it would be nice to stop
relying on the framework to provide it (for safety)
*/
implicit val pageElementWrites: Writes[PageElement] = Json.writes[PageElement]
}