in common/app/model/dotcomrendering/pageElements/PageElement.scala [894:1508]
def make(
element: ApiBlockElement,
addAffiliateLinks: Boolean,
pageUrl: String,
atoms: Iterable[Atom],
isMainBlock: Boolean,
isImmersive: Boolean,
campaigns: Option[JsValue],
calloutsUrl: Option[String],
overrideImage: Option[ImageElement],
edition: Edition,
webPublicationDate: DateTime,
): List[PageElement] = {
def extractAtom: Option[Atom] =
for {
d <- element.contentAtomTypeData
atom <- atoms.find(_.id == d.atomId)
} yield atom
val elementRole: Option[String] =
for {
d <- element.contentAtomTypeData
role <- d.role
} yield role
element.`type` match {
case Text =>
val textCleaners =
TextCleaner.affiliateLinks(pageUrl, addAffiliateLinks) _ andThen
TextCleaner.sanitiseLinks(edition)
for {
block <- element.textTypeData.toList
text <- block.html.toList
element <- TextCleaner.split(text)
cleanedElement = (element._1, textCleaners(element._2))
} yield {
cleanedElement match {
case ("h2", heading) => SubheadingBlockElement(heading)
case ("blockquote", blockquote) => BlockquoteBlockElement(blockquote)
case (_, para) => TextBlockElement(para)
}
}
case Tweet =>
(for {
data <- element.tweetTypeData
id <- data.id
html <- data.html
url <- data.originalUrl
} yield {
TweetBlockElement(
html,
url,
id,
element.assets.nonEmpty,
Role(data.role),
containsThirdPartyTracking(element.tracking),
data.source,
data.sourceDomain,
)
}).toList
case RichLink =>
List(
RichLinkBlockElement(
element.richLinkTypeData.flatMap(_.originalUrl),
element.richLinkTypeData.flatMap(_.linkText),
element.richLinkTypeData.flatMap(_.linkPrefix),
Role(element.richLinkTypeData.flatMap(_.role)),
element.richLinkTypeData.flatMap(_.sponsorship).map(Sponsorship(_)),
),
)
case Cartoon => element.cartoonTypeData.flatMap(extractCartoon).toList
case Image =>
def ensureHTTPS(src: String): String = src.replace("http:", "https:")
val imageAssets = element.assets.zipWithIndex
.map { case (a, i) => ImageAsset.make(a, i) }
val imageRoleWidthsByBreakpoint =
if (isMainBlock) MainMedia
else if (isImmersive) ImmersiveMedia
else BodyMedia
val imageSources = imageRoleWidthsByBreakpoint.all.map { case (weighting, widths) =>
val srcSet: Seq[SrcSet] = widths.breakpoints.flatMap { b =>
Seq(
ImgSrc.srcsetForBreakpoint(
b,
imageRoleWidthsByBreakpoint.immersive.breakpoints,
maybeImageMedia = Some(ImageMedia(imageAssets.toSeq)),
),
ImgSrc.srcsetForBreakpoint(
b,
imageRoleWidthsByBreakpoint.immersive.breakpoints,
maybeImageMedia = Some(ImageMedia(imageAssets.toSeq)),
hidpi = true,
),
)
}.flatten
// A few very old articles use non-https hosts, which won't render
val httpsSrcSet = srcSet.map(set => set.copy(src = ensureHTTPS(set.src)))
ImageSource(weighting, httpsSrcSet)
}.toSeq
// The default role is used when an image doesn't have one and is then meant to be Inline,
// that having been said, there are exceptions to this rule.
// For instance, if the page is immersive and the picture is `mainMedia` and the image
// doesn't have a role, then the role should be Immersive, thereby overriding the default Inline
val defaultRole = (isMainBlock, isImmersive) match {
case (true, true) => Immersive
case _ => Inline
}
List(
ImageBlockElement(
ImageMedia(imageAssets.toSeq),
imageDataFor(element),
element.imageTypeData.flatMap(_.displayCredit),
Role(element.imageTypeData.flatMap(_.role), defaultRole),
imageSources,
),
)
case Audio => audioToPageElement(element).toList
case Video => {
def secureVideoHtmlUrls(html: String, element: ApiBlockElement): String = {
/*
Date: 04th September 2020
author: Pascal
Enhance HTML to process cases such as
<video data-media-id=\"gu-video-457132757\" class=\"gu-video\" controls=\"controls\" poster=\"http://static.guim.co.uk/sys-images/Guardian/Pix/audio/video/2015/6/11/1434025823959/KP_270483_crop_640x360.jpg\">
<source src=\"http://cdn.theguardian.tv/mainwebsite/2015/06/11/150611spacediary_desk.mp4\"/>
<source src=\"http://cdn.theguardian.tv/3gp/small/2015/06/11/150611spacediary_small.3gp\"/>
<source src=\"http://cdn.theguardian.tv/HLS/2015/06/11/150611spacediary.m3u8\"/>
<source src=\"http://cdn.theguardian.tv/3gp/large/2015/06/11/150611spacediary_large.3gp\"/>
<source src=\"http://cdn.theguardian.tv/webM/2015/06/11/150611spacediary_synd_768k_vp8.webm\"/>
</video>
Originally found at https://www.theguardian.com/books/2020/sep/02/top-10-books-about-space-travel-samantha-cristoforetti?dcr=false
We need to replace the links by secure links.
There are three ways to do this
1. Replace "http:" by "https:" in the HTML string; but that's a bit dangerous.
2. Replace "http://cdn.theguardian.tv" by "https://cdn.theguardian.tv"; but that's limiting
3. Replace all the unsecured links by the secure ones. This is perfect but the problem is to list the (unsecured) links
To achieve that we capitalise on the fact that the links are listed in element.assets
The outcome is
<video data-media-id=\"gu-video-457132757\" class=\"gu-video\" controls=\"controls\" poster=\"http://static.guim.co.uk/sys-images/Guardian/Pix/audio/video/2015/6/11/1434025823959/KP_270483_crop_640x360.jpg\">
<source src=\"https://cdn.theguardian.tv/mainwebsite/2015/06/11/150611spacediary_desk.mp4\"/>
<source src=\"https://cdn.theguardian.tv/3gp/small/2015/06/11/150611spacediary_small.3gp\"/>
<source src=\"https://cdn.theguardian.tv/HLS/2015/06/11/150611spacediary.m3u8\"/>
<source src=\"https://cdn.theguardian.tv/3gp/large/2015/06/11/150611spacediary_large.3gp\"/>
<source src=\"https://cdn.theguardian.tv/webM/2015/06/11/150611spacediary_synd_768k_vp8.webm\"/>
</video>
*/
element.assets.toList
.foldLeft(html) { (h, asset) =>
val url = asset.file.getOrElse("")
h.replaceAll(url, url.replace("http:", "https:"))
}
}
if (element.assets.nonEmpty) {
List(
GuVideoBlockElement(
element.assets.map(VideoAsset.make).toSeq,
ImageMedia(
element.assets
.filter(_.mimeType.exists(_.startsWith("image")))
.zipWithIndex
.map { case (a, i) =>
ImageAsset.make(a, i)
}
.toSeq,
),
element.videoTypeData.flatMap(_.caption).getOrElse(""),
element.videoTypeData.flatMap(_.url).getOrElse(""),
element.videoTypeData.flatMap(_.originalUrl).getOrElse(""),
secureVideoHtmlUrls(element.videoTypeData.flatMap(_.html).getOrElse(""), element),
element.videoTypeData.flatMap(_.source).getOrElse(""),
Role(element.videoTypeData.flatMap(_.role)),
),
)
} else videoToPageElement(element).toList
}
case Membership =>
element.membershipTypeData
.map(m =>
MembershipBlockElement(
m.originalUrl,
m.linkText,
m.linkPrefix,
m.title,
m.venue,
m.location,
m.identifier,
m.image,
m.price,
),
)
.toList
case Comment =>
(for {
c <- element.commentTypeData
html <- c.html
} yield {
CommentBlockElement(
body = CommentCleaner.getBody(html),
avatarURL = CommentCleaner.getAvatar(html),
dateTime = CommentCleaner.getDateTime(html),
permalink = c.originalUrl.getOrElse(""),
profileURL = c.authorUrl.getOrElse(""),
profileName = c.authorName.getOrElse(""),
)
}).toList
case Embed => embedToPageElement(element, campaigns, calloutsUrl).toList
// This process returns either:
// 1. SoundcloudBlockElement
// 2. EmbedBlockElement
// 3. CalloutBlockElement
case Callout =>
element.calloutTypeData
.map { callout =>
CalloutExtraction.extractCalloutByCampaignId(
callout,
campaigns,
calloutsUrl,
)
}
.flatten
.toList
case Contentatom =>
(extractAtom match {
case Some(audio: AudioAtom) => {
Some(
AudioAtomBlockElement(
id = audio.id,
kicker = audio.data.kicker,
title = audio.atom.title,
coverUrl = audio.data.coverUrl,
trackUrl = audio.data.trackUrl,
duration = audio.data.duration,
contentId = audio.data.contentId,
),
)
}
case Some(chart: ChartAtom) => {
val encodedId = URLEncoder.encode(chart.id, "UTF-8")
// chart.id is a uuid, so there is no real need to url-encode it but just to be safe
Some(
ChartAtomBlockElement(
id = chart.id,
url = s"${Configuration.ajax.url}/embed/atom/chart/$encodedId",
html = chart.html, // This is atom.defaultHtml
css = None, // hardcoded to None during experimental period
js = None, // hardcoded to None during experimental period
title = chart.title,
),
)
}
case Some(explainer: ExplainerAtom) => {
Some(ExplainerAtomBlockElement(explainer.id, explainer.title, explainer.body))
}
case Some(guide: GuideAtom) => {
val html = guide.data.items
.map(item => s"${item.title.map(t => s"<p><strong>${t}</strong></p>").getOrElse("")}${item.body}")
.mkString("")
Some(
GuideAtomBlockElement(
id = guide.id,
label = guide.data.typeLabel.getOrElse("Quick Guide"),
title = guide.atom.title.getOrElse(""),
img = guide.image.flatMap(ImgSrc.getAmpImageUrl),
html = html,
credit = guide.credit.getOrElse(""),
),
)
}
case Some(interactive: InteractiveAtom) => {
val isLegacy =
InteractiveSwitchOver.date.isAfter(Chronos.jodaDateTimeToJavaTimeDateTime(webPublicationDate))
val encodedId = URLEncoder.encode(interactive.id, "UTF-8")
Some(
InteractiveAtomBlockElement(
id = interactive.id,
url = s"${Configuration.ajax.url}/embed/atom/interactive/$encodedId",
// Note, we parse legacy interactives to do minimal cleaning of
// the HTML (e.g. to ensure all tags are closed). Some break
// without this. E.g.
// https://www.theguardian.com/info/ng-interactive/2021/mar/17/make-sense-of-the-week-with-australia-weekend.
html =
if (isLegacy) Some(Jsoup.parseBodyFragment(interactive.html).outerHtml)
else Some(interactive.html),
css = Some(interactive.css),
js = interactive.mainJS,
placeholderUrl = interactive.placeholderUrl,
role = elementRole,
title = interactive.title,
),
)
}
case Some(mediaAtom: MediaAtom) => {
val imageOverride = overrideImage.map(_.images).flatMap(Video700.bestSrcFor)
val altText = overrideImage.flatMap(_.images.allImages.headOption.flatMap(_.altText))
mediaAtom match {
case youtube if mediaAtom.activeAssets.headOption.exists(_.platform == MediaAssetPlatform.Youtube) => {
mediaAtom.activeAssets.headOption.map(asset => {
YoutubeBlockElement(
id = mediaAtom.id, // CAPI ID
assetId = asset.id, // Youtube ID
channelId = mediaAtom.channelId, // Channel ID
mediaTitle = mediaAtom.title, // Caption
overrideImage = if (isMainBlock) imageOverride else None,
posterImage = mediaAtom.posterImage.map(NSImage1.imageMediaToSequence),
expired = mediaAtom.expired.getOrElse(false),
duration = mediaAtom.duration, // Duration in seconds
altText = if (isMainBlock) altText else None,
)
})
}
case _ =>
Some(
MediaAtomBlockElement(
mediaAtom.id,
mediaAtom.title,
mediaAtom.defaultHtml,
mediaAtom.assets.map(MediaAtomBlockElementMediaAsset.fromMediaAsset),
mediaAtom.duration,
mediaAtom.posterImage.map(NSImage1.imageMediaToSequence),
mediaAtom.expired,
mediaAtom.activeVersion,
mediaAtom.channelId,
),
)
}
}
case Some(profile: ProfileAtom) => {
val html = profile.data.items
.map(item => s"${item.title.map(t => s"<p><strong>${t}</strong></p>").getOrElse("")}${item.body}")
.mkString("")
val items = profile.data.items.toList.map(item => ProfileAtomBlockElementItem(item.title, item.body))
Some(
ProfileAtomBlockElement(
id = profile.id,
label = profile.data.typeLabel.getOrElse("Profile"),
title = profile.atom.title.getOrElse(""),
img = profile.image.flatMap(ImgSrc.getAmpImageUrl),
html = html,
items = items,
credit = profile.credit.getOrElse(""),
),
)
}
case Some(qa: QandaAtom) => {
Some(
QABlockElement(
id = qa.id,
title = qa.atom.title.getOrElse(""),
img = qa.image.flatMap(ImgSrc.getAmpImageUrl),
html = qa.data.item.body,
credit = qa.credit.getOrElse(""),
),
)
}
case Some(timeline: TimelineAtom) => {
Some(
TimelineAtomBlockElement(
id = timeline.id,
title = timeline.atom.title.getOrElse(""),
description = timeline.data.description,
events = timeline.data.events
.map(event =>
TimelineAtomEvent(
title = event.title,
date = TimelineAtom.renderFormattedDate(event.date, event.dateFormat),
body = event.body,
toDate = event.toDate.map(date => TimelineAtom.renderFormattedDate(date, event.dateFormat)),
unixDate = event.date,
toUnixDate = event.toDate,
),
)
.toSeq,
),
)
}
case Some(quizAtom: QuizAtom) => {
val questions = quizAtom.content.questions.map { q =>
QuizAtomQuestion(
id = q.id,
text = q.text,
answers = q.answers.map(a =>
QuizAtomAnswer(
id = a.id,
text = a.text,
revealText = a.revealText,
answerBuckets = a.buckets,
isCorrect = a.weight == 1,
),
),
imageUrl = q.imageMedia.flatMap(i => ImgSrc.getAmpImageUrl(i.imageMedia)),
imageAlt = q.imageMedia
.flatMap(i => i.imageMedia.masterImage.flatMap(_.altText))
// Remove surrounding quotes from alt text, e.g
// "hello world" => hello world
.map(_.replaceAll("^\"|\"$", "")),
)
}
Some(
QuizAtomBlockElement(
id = quizAtom.id,
quizType = quizAtom.quizType,
questions = questions,
resultBuckets = quizAtom.content.resultBuckets.map { bucket =>
QuizAtomResultBucket(bucket.id, bucket.title, bucket.description)
},
resultGroups =
quizAtom.content.resultGroups.map(x => QuizAtomResultGroup(x.id, x.title, x.shareText, x.minScore)),
),
)
}
// Here we capture all the atom types which are not yet supported.
// ContentAtomBlockElement is mapped to null in the DCR source code.
case Some(atom) => Some(ContentAtomBlockElement(atom.id))
case _ => None
}).toList
case GuMap =>
{
for {
mapElem <- element.mapTypeData
originalUrl <- mapElem.originalUrl
source <- mapElem.source
html <- mapElem.html
embedUrl <- getIframeSrc(html)
width <- getIframeWidth(html)
height <- getIframeHeight(html)
caption = mapElem.caption.getOrElse("")
title = mapElem.title.getOrElse("")
thirdPartyTracking = containsThirdPartyTracking(element.tracking)
} yield MapBlockElement(
embedUrl,
originalUrl,
Some(source),
caption,
title,
width,
height,
thirdPartyTracking,
mapElem.sourceDomain,
)
}.toList
case Pullquote =>
element.pullquoteTypeData
.map(d =>
PullquoteBlockElement(
d.html,
Role(d.role),
d.attribution,
containsThirdPartyTracking(element.tracking),
d.source,
d.sourceDomain,
),
)
.toList
case Interactive =>
element.interactiveTypeData
.map(d =>
InteractiveBlockElement(d.iframeUrl, d.alt, d.scriptUrl.map(ensureHTTPS), d.role, d.isMandatory, d.caption),
)
.toList
case Table => element.tableTypeData.map(d => TableBlockElement(d.html, Role(d.role), d.isMandatory)).toList
case Witness => {
(for {
wtd <- element.witnessTypeData
embedType <- wtd.witnessEmbedType
} yield {
embedType match {
case "image" => Some(makeWitnessBlockElementImage(element, wtd))
case "video" => Some(makeWitnessBlockElementVideo(element, wtd))
case "text" => Some(makeWitnessBlockElementText(element, wtd))
case _ => None
}
}).toList.flatten
}
case Document =>
element.documentTypeData
.map(d =>
DocumentBlockElement(
embedUrl = getEmbedUrl(d.html),
height = d.height,
width = d.width,
title = d.title,
isMandatory = d.isMandatory,
isThirdPartyTracking = containsThirdPartyTracking(element.tracking),
source = d.source,
sourceDomain = d.sourceDomain,
),
)
.toList
case Instagram =>
element.instagramTypeData
.map(d =>
InstagramBlockElement(
d.originalUrl,
d.html,
d.caption.isDefined,
containsThirdPartyTracking(element.tracking),
Some(d.source),
d.sourceDomain,
),
)
.toList
case Vine =>
(for {
fields <- element.vineTypeData
html <- fields.html
iframeSrc <- getIframeSrc(html)
} yield {
VineBlockElement(
iframeSrc,
getIframeHeight(html).getOrElse(0),
getIframeWidth(html).getOrElse(0),
fields.originalUrl,
fields.title,
containsThirdPartyTracking(element.tracking),
Some(fields.source),
fields.sourceDomain,
fields.role,
)
}).toList
case Code => {
(for {
data <- element.codeTypeData
} yield {
CodeBlockElement(data.html, data.language, false)
}).toList
}
case Form => List(FormBlockElement(None))
case GuList =>
element.listTypeData.map { listTypeData =>
ListBlockElement(
items = listTypeData.items.map { item =>
makeListItem(
addAffiliateLinks,
pageUrl,
atoms,
isImmersive,
campaigns,
calloutsUrl,
edition,
webPublicationDate,
item,
)
}.toSeq,
listElementType = listTypeData.`type`.map(_.name),
)
}.toList
case Timeline =>
element.timelineTypeData.map { timelineTypeData =>
TimelineBlockElement(
sections = makeTimelineSection(
addAffiliateLinks,
pageUrl,
atoms,
isImmersive,
campaigns,
calloutsUrl,
edition,
webPublicationDate,
timelineTypeData,
),
)
}.toList
case EnumUnknownElementType(f) => List(UnknownBlockElement(None))
case _ => Nil
}
}