in backend/app/extraction/email/msg/MsgEmailExtractor.scala [32:102]
private def getHeaderValue(message: OutlookMessage, header: String): Option[String] = Option(message.getHeaders)
.flatMap(_.lines.collect(Collectors.toList[String]).asScala.find(_.startsWith(s"$header:")))
.map(_.stripPrefix(s"$header:").trim)
.filter(!_.isEmpty)
private def processMessage(blob: Blob, msg: OutlookMessage, params: ExtractionParams): Unit = {
val uri = msg.getMessageId.hasTextOrNone().map(id => Uri(Email.cleanUri(id)))
val from = Option(msg.getFromEmail).map(e => Recipient(Option(msg.getFromName), e))
val sentAt = getHeaderValue(msg, "Date").flatMap(DateTimeUtils.rfc1123ToIsoDateString)
val subject = Option(msg.getSubject).getOrElse("")
val priority: Option[String] = getHeaderValue(msg, "X-Priority").map(v => Priority.withRfcValue(v))
val sensitivity: Option[Sensitivity] = getHeaderValue(msg, "Sensitivity").flatMap(v => Sensitivity.withPstIdOption(v.toInt))
val inReplyTo: List[String] = getHeaderValue(msg, "In-Reply-To").toList.flatMap(Email.cleanInReplyTo)
val references: List[String] = getHeaderValue(msg, "References").toList.flatMap(Email.cleanInReplyTo)
val recipients: List[Recipient] = msg.getRecipients.asScala
.flatMap(r => Option(r.getAddress)
.map(e => Recipient(Option(r.getName), e))).toList
val attachments = msg.getOutlookAttachments.asScala
val msgAttachments = attachments.collect { case m: OutlookMsgAttachment => m}
val fileAttachments = attachments.collect { case f: OutlookFileAttachment => f }
val body = msg.getBodyText
val html = Option(msg.getBodyHTML).map(msgHtml => Email.inlineAttachmentsIntoHtml(msgHtml, fileAttachments.iterator)(a =>
Option(a.getContentId).map { id =>
(a.getMimeTag, id.removeChevrons(), new ByteArrayInputStream(a.getData))
}
))
val attachmentCount = msgAttachments.length + fileAttachments.count { attachment =>
Option(attachment.getContentDisposition).forall(!_.startsWith("inline"))
}
val email = Email.createFrom(uri, from, recipients, sentAt, sensitivity, priority, subject, body, inReplyTo, references, html, attachmentCount)
val context = IngestionContextBuilder(blob.uri, params).finishWithEmail(email)
ingestionServices.ingestEmail(context, "application/vnd.ms-outlook")
val attachmentBuilder = IngestionContextBuilder(email.uri, params)
msgAttachments.foreach { m =>
processMessage(blob, m.getOutlookMessage, params)
}
fileAttachments.foreach { attachment =>
val attachmentStream = new ByteArrayInputStream(attachment.getData)
val workingDir = scratch.createWorkingDir(s"emails/${email.uri.value}/")
try {
// Create Blob URI
val localPath = workingDir.resolve(attachment.getLongFilename)
val attachmentFile = scratch.copyToScratchSpace(localPath, attachmentStream)
val blobUri = Uri(FingerprintServices.createFingerprintFromFile(attachmentFile))
val mimeType = Option(attachment.getMimeTag)
.getOrElse(tika.detectType(attachmentFile.toPath).map(_.toString)
.getOrElse(throw new Exception("Failed to get MIME type for attachment")))
// Ingest
val blob = Blob(blobUri, attachmentFile.length(), Set(MimeType(mimeType)))
val attachmentContext = attachmentBuilder.finishWithFile(attachmentFile.toPath)
ingestionServices.ingestFile(attachmentContext, blob.uri, attachmentFile.toPath)
} finally {
attachmentStream.close()
FileUtils.deleteDirectory(workingDir.toFile)
}
}
}