backend/app/services/previewing/PreviewService.scala (137 lines of code) (raw):
package services.previewing
import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}
import enumeratum.EnumEntry.Snakecase
import enumeratum.{EnumEntry, PlayEnum}
import model.index.{Document, IndexedResource}
import model.{Email, Language, ObjectData, ObjectMetadata, Uri}
import services.index.Index
import services.{ObjectStorage, PreviewConfig}
import utils.attempt.{Attempt, NotFoundFailure, PreviewNotSupportedFailure, UnsupportedOperationFailure}
import scala.concurrent.{ExecutionContext, Future}
// TODO MRB: The calling code has a BasicResource (thanks to the GetResource check it does anyway for permissions)
// At the moment that doesn't have the MIME types so we need to do another lookup here in the index. It would
// be cleaner if we could just pass that down and avoid the extra lookup here (and getResource on the index
// will also load all the text, which could be quite a lot, into memory only to discard it)
trait PreviewService {
def getPreviewType(uri: Uri): Attempt[String]
def generatePreview(uri: Uri): Attempt[Unit]
def getPreviewObject(uri: Uri): Attempt[ObjectData]
}
class DefaultPreviewService(index: Index, blobStorage: ObjectStorage, previewStorage: ObjectStorage,
htmlPreview: HtmlPreviewGenerator, libreOfficePreview: LibreOfficePreviewGenerator)(implicit ec: ExecutionContext)
extends PreviewService {
override def getPreviewType(uri: Uri): Attempt[String] = index.getResource(uri, highlightTextQuery = None).flatMap {
case d: Document =>
// Check and see if we already have a preview generated. This might be the case even if the blob can be
// passed directly to the client (eg ImageOcrExtractor rendering a PDF of the image with selectable text)
previewStorage.getMetadata(uri.toStoragePath).map(_.mimeType).toAttempt.recoverWith {
case _: NotFoundFailure =>
// Can we preview this type of document at all?
PreviewService.previewStatus(d.mimeTypes) match {
case PreviewStatus.PassThrough if d.mimeTypes.nonEmpty =>
// The client can render it natively
Attempt.Right(d.mimeTypes.head)
case PreviewStatus.PdfGenerated =>
// We can convert this file to a PDF to preview
Attempt.Right("application/pdf")
case _ =>
Attempt.Left(PreviewNotSupportedFailure)
}
}
case e: Email =>
e.html match {
case Some(_) => Attempt.Right("application/pdf")
case _ => Attempt.Left(PreviewNotSupportedFailure)
}
}
override def generatePreview(uri: Uri): Attempt[Unit] = for {
resource <- index.getResource(uri, highlightTextQuery = None)
_ <- getPreviewObjectGeneratingItIfRequired(resource, uri.toStoragePath)
} yield {
()
}
override def getPreviewObject(uri: Uri): Attempt[ObjectData] = for {
resource <- index.getResource(uri, highlightTextQuery = None)
data <- getPreviewObjectGeneratingItIfRequired(resource, uri.toStoragePath)
} yield {
data
}
private def getPreviewObjectGeneratingItIfRequired(resource: IndexedResource, storagePathInS3: String): Attempt[ObjectData] = {
// Check and see if we already have a preview generated. This might be the case even if the blob can be
// passed directly to the client (eg ImageOcrExtractor rendering a PDF of the image with selectable text)
getObjectData(storagePathInS3, previewStorage).recoverWith {
case _: NotFoundFailure =>
// No preview, can we pass it straight back to the client?
resource match {
case doc: Document if !PreviewService.requiresConversion(doc.mimeTypes) =>
// If your blob is a pass-through type, simply stream the data back to the client
getObjectData(storagePathInS3, blobStorage)
case _ => for {
// Generate a preview!
_ <- runGeneratorOnResource(resource, storagePathInS3)
// Return the newly generated preview
data <- getObjectData(storagePathInS3, previewStorage)
} yield {
data
}
}
}
}
private def runGeneratorOnResource(resource: IndexedResource, storagePathInS3: String): Attempt[Unit] = resource match {
case e: Email if e.html.isEmpty =>
Attempt.Left(NotFoundFailure(s"Email exists but does not have any HTML"))
case e: Email =>
val content = new ByteArrayInputStream(e.html.get.getBytes(StandardCharsets.UTF_8))
runGeneratorOnInputStream(e.uri.toStoragePath, htmlPreview, content)
case doc: Document =>
blobStorage.get(storagePathInS3).toAttempt.flatMap { blobData =>
if(doc.mimeTypes.exists(libreOfficePreview.isSupported)) {
runGeneratorOnInputStream(doc.uri.toStoragePath, libreOfficePreview, blobData)
} else {
Attempt.Left[Unit](UnsupportedOperationFailure(s"Libreoffice cannot convert '${doc.mimeTypes.mkString(", ")}' to a PDF"))
}
}
case _ =>
Attempt.Left[Unit](UnsupportedOperationFailure("You can only generate a preview for email or document resources."))
}
private def runGeneratorOnInputStream(storagePathInS3: String, generator: PreviewGenerator, is: InputStream): Attempt[Unit] = for {
localPathToGeneratedPreview <- generator.generate(is)
_ <- previewStorage.create(storagePathInS3, localPathToGeneratedPreview, mimeType = Some("application/pdf")).toAttempt
} yield {
Future { Files.delete(localPathToGeneratedPreview) } // asynchronously delete the file now it is in
Right(()) // signal we are done immediately
}
private def getObjectData(key: String, storage: ObjectStorage): Attempt[ObjectData] = for {
metadata <- storage.getMetadata(key).toAttempt
data <- storage.get(key).toAttempt
} yield {
ObjectData(data, metadata)
}
}
sealed abstract class PreviewStatus extends EnumEntry with Snakecase
object PreviewStatus extends PlayEnum[PreviewStatus] {
case object Disabled extends PreviewStatus
case object PassThrough extends PreviewStatus
case object PdfGenerated extends PreviewStatus
val values = findValues
}
object PreviewService {
// TODO MRB: this assumes that all clients support the same mime types
private val passthrough = Set("application/pdf", "image/jpeg", "image/gif", "image/png")
def previewStatus(mimeTypes: Set[String]): PreviewStatus = {
val isVideo = mimeTypes.exists(_.startsWith("video/"))
val isAudio = mimeTypes.exists(_.startsWith("audio/"))
val isBrowserRenderable = mimeTypes.exists(passthrough.contains)
val canPassThrough = isVideo || isAudio || isBrowserRenderable
val canGeneratePdf = mimeTypes.exists { mimeType =>
LibreOfficePreviewGenerator.isSupported(mimeType)
}
(canPassThrough, canGeneratePdf) match {
case (true, _) => PreviewStatus.PassThrough
case (_, true) => PreviewStatus.PdfGenerated
case (false, false) => PreviewStatus.Disabled
}
}
def requiresConversion(mimeTypes: Set[String]): Boolean = {
previewStatus(mimeTypes) != PreviewStatus.PassThrough
}
def getPageStoragePrefix(blobUri: Uri, language: Language): String = {
s"pages/${language.key}/${blobUri.toStoragePath}"
}
def getPageStoragePath(blobUri: Uri, language: Language, pageNumber: Int): String = {
s"${getPageStoragePrefix(blobUri, language)}/${pageNumber}.pdf"
}
def apply(preview: PreviewConfig, index: Index, blobStorage: ObjectStorage, previewStorage: ObjectStorage)(implicit ec: ExecutionContext): PreviewService = {
val workspace = Paths.get(preview.workspace)
Files.createDirectories(workspace)
val html = new HtmlPreviewGenerator(preview.wkhtmltopdfBinary, workspace)
val libreOffice = new LibreOfficePreviewGenerator(preview.libreOfficeBinary, workspace)
new DefaultPreviewService(index, blobStorage, previewStorage, html, libreOffice)
}
}