app/services/Capi.scala (253 lines of code) (raw):

package services import java.io.IOException import java.net.URI import java.nio.charset.Charset import java.util.concurrent.TimeUnit import com.amazonaws.auth.profile.ProfileCredentialsProvider import com.amazonaws.auth.{ AWSCredentialsProviderChain, STSAssumeRoleSessionCredentialsProvider } import com.gu.contentapi.client.model._ import com.gu.contentapi.client.model.v1.{Content, SearchResponse} import com.gu.contentapi.client.{GuardianContentClient, IAMSigner, Parameter} import conf.ApplicationConfiguration import logging.Logging import logic.CapiPrefiller import model.editions._ import okhttp3.{Call, Callback, Request, Response} import org.apache.http.client.utils.URLEncodedUtils import services.editions.prefills.{Prefill, PrefillParamsAdapter} import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future, Promise} object GuardianCapiDefaults { // 200 is max value in CAPI val MaxPageSize: Int = 200 } class GuardianCapi(config: ApplicationConfiguration)(implicit ex: ExecutionContext ) extends GuardianContentClient(apiKey = config.contentApi.editionsKey) with Capi with Logging { override def targetUrl: String = config.contentApi.contentApiDraftHost override def get(url: String, headers: Map[String, String])(implicit context: ExecutionContext ): Future[HttpResponse] = { val reqBuilder = getPreviewHeaders(headers, url).foldLeft(new Request.Builder().url(url)) { case (builder, headerPair) => val (headerName, headerValue) = headerPair builder.addHeader(headerName, headerValue) } val promise = Promise[HttpResponse]() http .newCall(reqBuilder.build()) .enqueue(new Callback() { override def onFailure(call: Call, e: IOException): Unit = promise.failure(e) override def onResponse(call: Call, response: Response): Unit = { promise.success( HttpResponse( response.body().bytes, response.code(), response.message() ) ) } }) promise.future } private val previewSigner = { val capiPreviewCredentials = new AWSCredentialsProviderChain( new ProfileCredentialsProvider("capi"), new STSAssumeRoleSessionCredentialsProvider.Builder( config.contentApi.previewRole, "capi" ).build() ) new IAMSigner( credentialsProvider = capiPreviewCredentials, awsRegion = config.aws.region ) } def getPreviewHeaders( headers: Map[String, String], url: String ): Seq[(String, String)] = previewSigner.addIAMHeaders(headers = headers, URI.create(url)).toSeq // Sadly there's no easy way of converting a CAPI client response into JSON so we'll just proxy - similar to controllers.FaciaContentApiProxy // this function is used for (suggest articles for collection) functionality def getPrefillArticles( getPrefill: PrefillParamsAdapter, currentPageCodes: List[String] ): List[SearchResponse] = { val query = GuardianCapi.prepareGetPrefillArticlesQuery(getPrefill, currentPageCodes) logger.info( s"getPrefillArticles, Prefill Query: $query for ${getPrefill.metadataForLogging}" ) val getResponseFunction = (query: CapiQueryGenerator) => this.getResponse(query) val allResponses = GuardianCapi.readAllSearchResponsePages(query, getResponseFunction) withResultsThatNotContainCurrentPageCodes(allResponses, currentPageCodes) } private def withResultsThatNotContainCurrentPageCodes( responses: List[SearchResponse], currentPageCodes: List[String] ) = { val filterByPageCodes = (content: Content) => { (for { fields <- content.fields pageCode <- fields.internalPageCode } yield !currentPageCodes.contains(pageCode.toString)) .getOrElse(true) } responses.map(response => { val filteredResults = response.results.filter(filterByPageCodes) response.copy( total = filteredResults.length, results = filteredResults ) }) } /** Get a list of article items in the order they exist according to the * newspaper page number * * @param getPrefillParams * @return */ def getUnsortedPrefillArticleItems( getPrefillParams: PrefillParamsAdapter ): List[Prefill] = { val query: CapiQueryGenerator = GuardianCapi.prepareGetUnsortedPrefillArticleItemsQuery(getPrefillParams) logger.info( s"getUnsortedPrefillArticleItems, Prefill Query: $query for ${getPrefillParams.metadataForLogging}" ) val getResponseFunction = (query: CapiQueryGenerator) => this.getResponse(query) logger.info(s"query => ${query.getUrl(targetUrl)}") val searchResponsePages = GuardianCapi.readAllSearchResponsePages(query, getResponseFunction) searchResponsePages.flatMap(mapToPrefill) } private def mapToPrefill(response: SearchResponse): List[Prefill] = response.results.flatMap(content => prefillMetadata(content)).toList private def prefillMetadata(content: Content): Option[Prefill] = content.fields.flatMap(_.internalPageCode).map { internalPageCode => CapiPrefiller.prefill(content) } } object GuardianCapi extends Logging { import scala.jdk.CollectionConverters._ def prepareGetUnsortedPrefillArticleItemsQuery( getPrefillParams: PrefillParamsAdapter ): CapiQueryGenerator = { val fields = List( "newspaperEditionDate", "newspaperPageNumber", "internalPageCode" ) geneneratePrefillQuery(getPrefillParams, fields).showTags("all") } def prepareGetPrefillArticlesQuery( getPrefill: PrefillParamsAdapter, currentPageCodes: List[String] ): CapiQueryGenerator = { val fields = List( "newspaperEditionDate", "newspaperPageNumber", "internalPageCode", "isLive", "firstPublicationDate", "headline", "trailText", "byline", "thumbnail", "secureThumbnail", "liveBloggingNow", "membershipAccess", "shortUrl" ) geneneratePrefillQuery(getPrefill, fields) .showElements("images") .showTags("all") .showBlocks("main") .showAtoms("media") } private def geneneratePrefillQuery( getPrefillParams: PrefillParamsAdapter, fields: List[String] ): CapiQueryGenerator = { import getPrefillParams._ val params = URLEncodedUtils .parse( new URI(capiPrefillQuery.escapedQueryString()), Charset.forName("UTF-8") ) .asScala import capiPrefillTimeParams.{capiDateQueryParam, capiQueryTimeWindow} import capiQueryTimeWindow.{fromDate, toDate} var query = CapiQueryGenerator(capiPrefillQuery.pathType) .pageSize(GuardianCapiDefaults.MaxPageSize) .showFields(fields.mkString(",")) .useDate(capiDateQueryParam.entryName) .orderBy("newest") .fromDate(fromDate) .toDate(toDate) params.filter(pair => pair.getName == "section").foreach { sectionPair => query = query.section(sectionPair.getValue) } params.filter(pair => pair.getName == "tag").foreach { tagPair => query = query.tag(tagPair.getValue) } params.find(pair => pair.getName == "q").foreach { queryPair => query = query.q(queryPair.getValue) } query } private[services] def readAllSearchResponsePages( query: CapiQueryGenerator, getResponse: CapiQueryGenerator => Future[SearchResponse] )(implicit ex: ExecutionContext): List[SearchResponse] = { // Capi Scala client have functions that reads paginated responses // but they give inaccurate results (most of the time it gives only the first page) // TODO investigate that // // val paginateFoldRes = client.paginateFold(query)(Seq(): Seq[SearchResponse]) { // (response: SearchResponse, acc: Seq[SearchResponse]) => acc :+ response // } // // val response: List[SearchResponse] = Await.result(paginateFoldRes, Duration.Inf).toList val FirstPageReqTimeout = Duration(3, TimeUnit.SECONDS) val firstPageResponse = Await.result(getResponse(query.page(1)), FirstPageReqTimeout) val totalPages = firstPageResponse.pages val allResponsePages = if (totalPages == 0 || totalPages == 1) List(firstPageResponse) else { val remainingPages = readRemainingPages(totalPages, query, getResponse) firstPageResponse +: remainingPages } logger.info( s"readAllSearchResponsePages, fetched CAPI search Response pages count ${allResponsePages.size}" ) allResponsePages } private def readRemainingPages( totalPages: Int, query: CapiQueryGenerator, getResponse: CapiQueryGenerator => Future[SearchResponse] )(implicit ex: ExecutionContext): List[SearchResponse] = { val RemainingPagesReqTimeout = Duration(5, TimeUnit.SECONDS) val remainingPages = (1 to totalPages).tail val restFutures: List[Future[SearchResponse]] = (for (nextPageNum <- remainingPages) yield getResponse(query.page(nextPageNum))).toList Await.result(Future.sequence(restFutures), RemainingPagesReqTimeout) } } case class CapiQueryGenerator( pathType: PathType, parameterHolder: Map[String, Parameter] = Map.empty ) extends SearchQueryBase[CapiQueryGenerator] { def withParameters(parameterMap: Map[String, Parameter]) = copy(pathType, parameterMap) override def pathSegment: String = pathType.toPathSegment } trait Capi { def getPreviewHeaders( headers: Map[String, String], url: String ): Seq[(String, String)] def getUnsortedPrefillArticleItems( prefillParams: PrefillParamsAdapter ): List[Prefill] def getPrefillArticles( prefillParams: PrefillParamsAdapter, currentPageCodes: List[String] ): List[SearchResponse] }