backend/app/controllers/api/PagesController.scala (120 lines of code) (raw):

package controllers.api import commands.{GetPagePreview, GetResource, ResourceFetchMode} import model.frontend.{Chips, HighlightableText, TextHighlight} import model.index.{FrontendPage, HighlightForSearchNavigation, PageHighlight} import model.{Language, Languages, Uri} import org.apache.pdfbox.pdmodel.PDDocument import play.api.libs.json.Json import play.api.mvc.{ResponseHeader, Result} import services.ObjectStorage import services.annotations.Annotations import services.index.{Index, Pages2} import services.manifest.Manifest import services.previewing.PreviewService import utils.PDFUtil import utils.attempt.Attempt import utils.controller.{AuthApiController, AuthControllerComponents} class PagesController(val controllerComponents: AuthControllerComponents, manifest: Manifest, index: Index, pagesService: Pages2, annotations: Annotations, previewStorage: ObjectStorage) extends AuthApiController { def getPageCount(uri: Uri) = ApiAction.attempt { req => pagesService.getPageCount(uri).map(count => Ok(Json.obj("pageCount" -> count))) } // Get language and highlight data for a given page // This expects searchQuery to have already been run through Chips.parseQueryString private def frontendPageFromQuery(uri: Uri, pageNumber: Int, username: String, searchQuery: Option[String], findQuery: Option[String]): Attempt[FrontendPage] = { val getResource = GetResource(uri, ResourceFetchMode.Basic, username, manifest, index, annotations, controllerComponents.users).process() val getPage = pagesService.getPageGeometries(uri, pageNumber, searchQuery, findQuery) for { // Check we have permission to see this file _ <- getResource page <- getPage allLanguages = page.value.keySet // Highlighting stuff searchHighlights = dedupeHighlightSpans(page.page, page.value, false) findHighlights = page.highlightedText.map { langMap => dedupeHighlightSpans(page.page, langMap, true) }.getOrElse(Map.empty) highlights <- getHighlightGeometriesForPage(uri, pageNumber, searchHighlights, findHighlights) } yield { FrontendPage(pageNumber, allLanguages.head, allLanguages, page.dimensions, highlights.flatMap(_.highlights).toList) } } // Get language and highlight data for a given page def getPageData(uri: Uri, pageNumber: Int, sq: Option[String], fq: Option[String]) = ApiAction.attempt { req => for { response <- frontendPageFromQuery(uri, pageNumber, req.user.username, sq.map(Chips.parseQueryString(_).query), fq) } yield { Ok(Json.toJson(response)) } } case class HighlightGeometries(lang: Language, highlights: List[PageHighlight]) private def getHighlightGeometriesForPage(uri: Uri, pageNumber: Int, highlights: Map[Language, List[TextHighlight]], findHighlights: Map[Language, List[TextHighlight]]) = { val previewPaths = (highlights.keySet ++ findHighlights.keySet).map { lang => lang -> PreviewService.getPageStoragePath(uri, lang, pageNumber) } Attempt.sequence(previewPaths.map { case (lang, path) => previewStorage.get(path).toAttempt.map { pdfData => try { val pdf = PDDocument.load(pdfData) try { val highlightSpans = highlights.getOrElse(lang, Nil) val findHighlightSpans = findHighlights.getOrElse(lang, Nil) val highlightGeometries = PDFUtil.getSearchResultHighlights(highlightSpans, pdf, pageNumber, false) val findHighlightGeometries = PDFUtil.getSearchResultHighlights(findHighlightSpans, pdf, pageNumber, true) HighlightGeometries(lang, highlightGeometries ++ findHighlightGeometries) } finally { pdf.close() } } finally { pdfData.close() } } }) } // This is pretty ugly, probably super inefficient too... // It basically pulls out the individual highlight spans for each language and then deduplicates ones that appear // in multiple langauges. This allows us to avoid calculating highlight geometry for multiple languages when the // highlight is the same. // This is good because it allows us to minimise the number of downloads from S3 in the common case. private def dedupeHighlightSpans(page: Long, valueMap: Map[Language, String], isFind: Boolean): Map[Language, List[TextHighlight]] = { valueMap.toList.flatMap { case (lang, text) => val hlText = HighlightableText.fromString(text, Some(page), isFind) hlText.highlights.map(span => (lang, span)) }.groupBy(_._2).toList.map { case (lang, commonSpans) => commonSpans.head }.groupBy(_._1).view.mapValues(_.map(_._2)).filter { case (k, v) => v.nonEmpty }.toMap } def getPagePreview(uri: Uri, pageNumber: Int) = ApiAction.attempt { req => val getResource = GetResource(uri, ResourceFetchMode.Basic, req.user.username, manifest, index, annotations, controllerComponents.users).process() val getPagePreview = new GetPagePreview(uri, Languages.getByKeyOrThrow("english"), pageNumber, previewStorage).process() for { // Check we have permission to see this file _ <- getResource response <- getPagePreview } yield { Result(ResponseHeader(200, Map.empty), response) } } private def getHighlights(uri: Uri, query: String, username: String, isSearch: Boolean): Attempt[Seq[HighlightForSearchNavigation]] = { val searchQuery = if (isSearch) Some(query) else None val findQuery = if (isSearch) None else Some(query) for { pagesWithHits <- pagesService.findInPages(uri, query) pageData <- Attempt.sequence( pagesWithHits.map(frontendPageFromQuery(uri, _, username, searchQuery, findQuery)) ) } yield { val highlights = for { page <- pageData highlight <- page.highlights } yield { HighlightForSearchNavigation.fromPageHighlight(page.page, highlight.index, highlight) } highlights.sortBy(h => (h.pageNumber, h.highlightNumber)) } } // This endpoint is used to get highlights for "find in document" on-demand queries. def findInDocument(uri: Uri, q: String) = ApiAction.attempt { req => getHighlights(uri, q, req.user.username, isSearch = false).map(highlights => Ok(Json.toJson(highlights)) ) } // This endpoint is used to get highlights for the "search across documents" query which // should be fixed for the lifetime of the page viewer of a given document. // It behaves identically to the findInDocument endpoint, except that it expects its query to be in // a JSON format that may contain chips, and it returns highlight ids with a different prefix. def searchInDocument(uri: Uri, q: String) = ApiAction.attempt { req => getHighlights(uri, Chips.parseQueryString(q).query, req.user.username, isSearch = true).map(highlights => Ok(Json.toJson(highlights)) ) } }