app/com/gu/itunes/iTunesRssItem.scala (284 lines of code) (raw):
package com.gu.itunes
import org.joda.time._
import com.gu.contentapi.client.model.v1._
import org.apache.commons.codec.digest.DigestUtils.md5Hex
import java.net.URI
import scala.xml.Node
class iTunesRssItem(val podcast: Content, val tagId: String, asset: Asset, adFree: Boolean = false, podcastType: Option[String] = None, imageResizerSignatureSalt: Option[String]) {
private val trailText = podcast.fields.flatMap(_.trailText)
private val standfirstOrTrail = podcast.fields.flatMap(_.standfirst) orElse trailText
private def isValidForEpisodicArtwork(podcast: Content): Boolean = {
(tagId == "lifeandstyle/series/comforteatingwithgracedent" &&
podcast.webPublicationDate.exists(wpd => new DateTime(wpd.dateTime).getMillis >= new DateTime(2024, 6, 11, 0, 0).getMillis)) ||
(tagId == "australia-news/series/full-story" &&
podcast.webPublicationDate.exists(wpd => new DateTime(wpd.dateTime).getMillis >= new DateTime(2024, 10, 8, 0, 0).getMillis)) ||
(tagId == "news/series/guardian-australia-podcast-series" &&
podcast.webPublicationDate.exists(wpd => new DateTime(wpd.dateTime).getMillis >= new DateTime(2022, 10, 1, 0, 0).getMillis)) ||
(tagId == "news/series/todayinfocus" &&
podcast.webPublicationDate.exists(wpd => new DateTime(wpd.dateTime).getMillis >= new DateTime(2025, 3, 13, 0, 0).getMillis))
}
def toXml: Node = {
// TODO: remove the below when suffix is added only where it is needed, and not by journalists
val suffix = """(.*) [-–—|] podcast$""".r
val title = podcast.webTitle match { case suffix(prefix) => prefix; case otherwise => otherwise }
val episodePattern = """[Ee]pisode\s+([0-9]+)""".r.unanchored
val episodeNumber = for {
typ <- podcastType
if typ.toLowerCase == "serial"
episodeIndicator <- episodePattern.findFirstMatchIn(podcast.webTitle)
episodeNumber <- Option(episodeIndicator.group(1))
} yield episodeNumber
val lastModified = podcast.webPublicationDate.map(date => new DateTime(date.dateTime)).getOrElse(DateTime.now)
val pubDate = DateSupport.toRssTimeFormat(lastModified)
val membershipCta = {
val theMomentFrom = new DateTime(2019, 2, 20, 0, 0)
val launchDayTIF = new DateTime(2018, 11, 14, 0, 0)
val launchDayPW = new DateTime(2016, 12, 6, 0, 0)
val launchDayPWNew = new DateTime(2018, 11, 15, 0, 0)
val footballWeekly = new DateTime(2019, 4, 4, 0, 0)
if (!adFree) {
if (tagId == "politics/series/politicsweekly") {
if (lastModified.isAfter(theMomentFrom))
""". Help support our independent journalism at <a href="https://www.theguardian.com/politicspod">theguardian.com/politicspod</a>"""
else if (lastModified.isAfter(launchDayPWNew))
""". To support The Guardian’s independent journalism, visit <a href="https://www.theguardian.com/give/podcast">theguardian.com/give/podcast</a>"""
else if (lastModified.isAfter(launchDayPW))
""". Please support our work and help us keep the world informed. To fund us, go to https://www.theguardian.com/give/podcast"""
else
""
} else if (tagId == "news/series/todayinfocus") {
if (lastModified.isAfter(theMomentFrom))
""". Help support our independent journalism at <a href="https://www.theguardian.com/infocus">theguardian.com/infocus</a>"""
else if (lastModified.isAfter(launchDayTIF))
""". To support The Guardian’s independent journalism, visit <a href="https://www.theguardian.com/todayinfocus/support">theguardian.com/todayinfocus/support</a>"""
else
""
} else if (tagId == "books/series/books") {
if (lastModified.isAfter(theMomentFrom))
""". Help support our independent journalism at <a href="https://www.theguardian.com/bookspod">theguardian.com/bookspod</a>"""
else
""
} else if (tagId == "news/series/the-audio-long-read") {
if (lastModified.isAfter(theMomentFrom))
""". Help support our independent journalism at <a href="https://www.theguardian.com/longreadpod">theguardian.com/longreadpod</a>"""
else
""
} else if (tagId == "science/series/science") {
if (lastModified.isAfter(theMomentFrom))
""". Help support our independent journalism at <a href="https://www.theguardian.com/sciencepod">theguardian.com/sciencepod</a>"""
else
""
} else if (tagId == "technology/series/chips-with-everything") {
if (lastModified.isAfter(theMomentFrom))
""". Help support our independent journalism at <a href="https://www.theguardian.com/chipspod">theguardian.com/chipspod</a>"""
else
""
} else if (tagId == "football/series/footballweekly") {
if (lastModified.isAfter(footballWeekly))
""". Help support our independent journalism at <a href="https://www.theguardian.com/footballweeklypod">theguardian.com/footballweeklypod</a>"""
else ""
} else {
""
}
} else {
""
}
}
def acastProxy(url: String): String = {
case class AcastLaunchGroup(launchDate: DateTime, tagIds: Seq[String])
val acastPodcasts: Seq[AcastLaunchGroup] = Seq(
AcastLaunchGroup(new DateTime(2017, 4, 19, 0, 0), Seq("technology/series/chips-with-everything")),
AcastLaunchGroup(new DateTime(2017, 5, 2, 0, 0), Seq(
"football/series/footballweekly",
"news/series/the-audio-long-read",
"science/series/science",
"politics/series/politicsweekly",
"arts/series/culture",
"books/series/books",
"technology/series/chips-with-everything",
"society/series/token")),
AcastLaunchGroup(new DateTime(2017, 6, 12, 0, 0), Seq(
"politics/series/brexit-means",
"global-development/series/global-development-podcast",
"news/series/the-story",
"lifeandstyle/series/close-encounters",
"music/series/musicweekly",
"lifeandstyle/series/guardian-guide-to-running-podcast-beginner",
"commentisfree/series/what-would-a-feminist-do",
"tv-and-radio/series/game-of-thrones-the-citadel-podcast",
"australia-news/series/australian-politics-live",
"australia-news/series/behind-the-lines-podcast",
"artanddesign/series/guardian-australia-culture-podcast",
"film/series/the-dailies-podcast",
"world/series/project",
"us-news/series/politics-for-humans")),
AcastLaunchGroup(new DateTime(2018, 2, 21, 0, 0), Seq(
"culture/series/thestart")),
AcastLaunchGroup(new DateTime(2018, 5, 30, 0, 0), Seq(
"australia-news/series/the-reckoning-guardian-australia-podcast")),
AcastLaunchGroup(new DateTime(2018, 5, 29, 0, 0), Seq(
"membership/series/we-need-to-talk-about")),
AcastLaunchGroup(new DateTime(2018, 9, 13, 0, 0), Seq(
"society/series/beyondtheblade")),
AcastLaunchGroup(new DateTime(2018, 10, 25, 0, 0), Seq(
"news/series/todayinfocus")),
AcastLaunchGroup(new DateTime(2018, 11, 24, 0, 0), Seq(
"australia-news/series/witch-hunt")),
AcastLaunchGroup(new DateTime(2019, 1, 28, 0, 0), Seq(
"environment/series/look-at-me")),
AcastLaunchGroup(new DateTime(2019, 5, 23, 0, 0), Seq(
"sport/series/the-spin-podcast")),
AcastLaunchGroup(new DateTime(2019, 10, 7, 0, 0), Seq(
"australia-news/series/full-story")),
AcastLaunchGroup(new DateTime(2020, 1, 28, 0, 0), Seq(
"science/series/thegenegapcommonthreads")),
AcastLaunchGroup(new DateTime(2020, 5, 7, 0, 0), Seq(
"football/series/forgotten-stories-of-football")),
AcastLaunchGroup(new DateTime(2020, 6, 18, 0, 0), Seq(
"society/series/innermost")),
AcastLaunchGroup(new DateTime(2020, 11, 25, 0, 0), Seq(
"australia-news/series/temporary")),
AcastLaunchGroup(new DateTime(2021, 1, 19, 0, 0), Seq(
"music/series/reverberate")),
AcastLaunchGroup(new DateTime(2021, 6, 8, 0, 0), Seq(
"lifeandstyle/series/comforteatingwithgracedent")),
AcastLaunchGroup(new DateTime(2021, 9, 1, 0, 0), Seq(
"australia-news/series/australia-reads")),
AcastLaunchGroup(new DateTime(2021, 10, 5, 0, 0), Seq(
"culture/series/saved-for-later")),
AcastLaunchGroup(new DateTime(2021, 12, 2, 0, 0), Seq(
"culture/series/book-it-in")),
AcastLaunchGroup(new DateTime(2021, 12, 8, 0, 0), Seq(
"sport/series/the-final-word-ashes-podcast")),
AcastLaunchGroup(new DateTime(2022, 1, 31, 0, 0), Seq(
"lifeandstyle/series/weekend")),
AcastLaunchGroup(new DateTime(2022, 2, 18, 0, 0), Seq(
"politics/series/politics-weekly-america")),
AcastLaunchGroup(new DateTime(2022, 6, 28, 0, 0), Seq(
"football/series/theguardianswomensfootballweekly")),
AcastLaunchGroup(new DateTime(2022, 9, 6, 0, 0), Seq(
"society/series/canitellyouasecret")),
AcastLaunchGroup(new DateTime(2022, 10, 18, 0, 0), Seq(
"news/series/an-impossible-choice")),
AcastLaunchGroup(new DateTime(2022, 10, 19, 0, 0), Seq(
"society/series/pop-culture-with-chante-joseph")),
AcastLaunchGroup(new DateTime(2022, 10, 26, 0, 0), Seq(
"news/series/ben-roberts-smith-v-the-media")),
AcastLaunchGroup(new DateTime(2022, 10, 28, 0, 0), Seq(
"news/series/australia-v-the-climate")),
AcastLaunchGroup(new DateTime(2023, 3, 28, 0, 0), Seq(
"news/series/cotton-capital-podcast")),
AcastLaunchGroup(new DateTime(2024, 2, 15, 0, 0), Seq(
"technology/series/blackbox")),
AcastLaunchGroup(new DateTime(2024, 4, 11, 0, 0), Seq(
"australia-news/series/who-screwed-millennials")),
// Yes, the launch date for the guardian-australia-podcast-series is correct. This is a new series tag for
// pre-existing episodes that have been re-invigorated by the addition of episodic artwork, but it won't be
// re-published. The oldest piece is expected to be from October 2022.
AcastLaunchGroup(new DateTime(2022, 10, 1, 0, 0), Seq(
"news/series/guardian-australia-podcast-series")))
val useAcastProxy = !adFree && acastPodcasts.find(_.tagIds.contains(tagId)).exists(p => lastModified.isAfter(p.launchDate))
if (useAcastProxy) "https://flex.acast.com/" + url.replace("https://", "") else url
}
// enabling html in descriptions is a bit of an unknown so we'll restrict the potential for upset
// by limiting the effect to just the TiF series generally and Politics Weekly America from April 4th 2025
// for the moment. We can extend or remove this as we (or editorial) like - assuming it doesn't break
// any of the platforms along the way, obvs.
val shouldPreserveHtmlInDescription =
tagId == "news/series/todayinfocus" ||
(tagId == "politics/series/politics-weekly-america" && podcast.webPublicationDate.exists(wpd => new DateTime(wpd.dateTime).getMillis >= new DateTime(2025, 4, 4, 0, 0).getMillis))
val description = Filtering.standfirst(standfirstOrTrail.getOrElse(""), preserveHtml = shouldPreserveHtmlInDescription) + membershipCta
val url = acastProxy(asset.file.getOrElse(""))
val sizeInBytes = asset.typeData.flatMap(_.sizeInBytes).getOrElse(0).toString
val mType = asset.mimeType.getOrElse("")
/* Old content served from http(s)://static(-secure).guim.co.uk/{...} will have the guid field set
to http://download.guardian.co.uk/{...} for legacy reasons (to match the R2 implementation);
new content served from https://audio.guim.co.uk will preserve its structure. */
val capiUrl = asset.file.getOrElse("")
val regex = s"""https?://static(-secure)?.guim.co.uk/audio/kip/$tagId"""
val guid = {
val composerBasedGuidsLaunchDate = new DateTime(2018, 11, 14, 0, 0)
val default = (capiUrl.replaceAll(regex, "http://download.guardian.co.uk/draft/audio"), true)
if (lastModified.isAfter(composerBasedGuidsLaunchDate)) {
// We use the internal composer code as the guid from this time on
// These are not perma links.
podcast.fields.flatMap(_.internalComposerCode.map((_, false))).getOrElse(default)
} else
default
}
val duration = {
val seconds = asset.typeData.flatMap(_.durationSeconds)
val minutes = asset.typeData.flatMap(_.durationMinutes)
convertDate(seconds, minutes)
}
val explicit = {
val exp = asset.typeData.flatMap(_.explicit).getOrElse(false)
val cln = asset.typeData.flatMap(_.clean).getOrElse(false)
if (exp) Some("yes") else if (cln) Some("clean") else None
}
val keywords = makeKeywordsList(podcast.tags.toSeq)
val subtitle = Filtering.standfirst(trailText.getOrElse(""), preserveHtml = false)
val summary = Filtering.standfirst(standfirstOrTrail.getOrElse(""), preserveHtml = false) + membershipCta
val episodeImage: Option[String] = imageResizerSignatureSalt.filter(_.nonEmpty && isValidForEpisodicArtwork(podcast)).flatMap { salt =>
val maybeThumbnailImageElements = podcast.elements.find(_.exists(el => el.relation == "thumbnail" && el.`type` == ElementType.Image))
.getOrElse(Seq.empty)
val assets = maybeThumbnailImageElements.flatMap { el =>
el.assets
.filterNot(_.typeData.flatMap(_.isMaster).getOrElse(false))
.filter(_.typeData.flatMap(_.width).isDefined)
.sortBy(_.typeData.map(_.width))
.reverse
}
val maxDim = 3000 // we're going for square crops, so width will always == height anyway
val quality = 75 // reads like a compression limiter (rather than dpi)
val fit = "crop" // automatically crop from the centre of the original
assets.headOption.flatMap { asset =>
asset.file.map { filePath =>
val uri = new URI(filePath)
val scheme = uri.getScheme
val imgType = uri.getHost.split("\\.").headOption.map(name => s"/$name").getOrElse("") // eg. media.guim.go.uk becomes /media
val resizeString = s"width=$maxDim&height=$maxDim&quality=$quality&fit=$fit"
val pathWithResizeString = s"${uri.getRawPath}?$resizeString"
val separator = if (pathWithResizeString.contains("?")) "&" else "?"
val signedPath = s"$pathWithResizeString${separator}s=${md5Hex(s"$salt$pathWithResizeString")}"
val imageUri = s"$scheme://i.guim.co.uk/img$imgType$signedPath"
imageUri
}
}
}
<item>
<title>{ title }</title>
<itunes:title>{ title }</itunes:title>
<description>{ description }</description>
<enclosure url={ url } length={ sizeInBytes } type={ mType }/>
<pubDate>{ pubDate }</pubDate>
<guid isPermaLink={ guid._2.toString }>{ guid._1 }</guid>
<itunes:duration>{ duration }</itunes:duration>
<itunes:author>{ iTunesRssFeed.author }</itunes:author>
{
episodeImage match {
case Some(image) => <itunes:image href={ image }/>
case None =>
}
}
{
explicit match {
case Some(value) => <itunes:explicit>{ value }</itunes:explicit>
case None =>
}
}
<itunes:keywords>{ keywords }</itunes:keywords>
{
if (!adFree) {
<itunes:subtitle>{ subtitle }</itunes:subtitle>
}
}
{
episodeNumber match {
case Some(num) => <itunes:episode>{ num }</itunes:episode>
case None =>
}
}
<itunes:summary>{ scala.xml.Utility.escape(summary) }</itunes:summary>
</item>
}
// convert seconds[Int] and minutes[Int] into HH:MM:SS[String]
private def convertDate(seconds: Option[Int], minutes: Option[Int]): String = {
val totalSec = minutes.getOrElse(0) * 60 + seconds.getOrElse(0)
val hrs = totalSec / 3600
val rst = totalSec % 3600
val min = rst / 60
val sec = rst % 60
s"${if (hrs < 10) "0" + hrs else hrs}:${if (min <= 9) "0" + min else min}:${if (sec <= 9) "0" + sec else sec}"
}
private def makeKeywordsList(tags: Seq[Tag]): String = {
val keys = for (t <- tags) yield t.webTitle
keys.mkString(", ")
}
}