in backend/app/extraction/email/eml/EmlParser.scala [24:94]
def parseMessage(message: Message): Option[(Email, Seq[MimeBodyPart])] = {
val uri = getMessageUri(message)
val senderAddress = Option(message.getFrom).flatMap(_.headOption.map(_.asInstanceOf[InternetAddress]))
val from = senderAddress.map { addr => Recipient(Option(addr.getPersonal), addr.getAddress) }
val sentAt = Option(message.getHeader("Date")).flatMap(_.headOption.flatMap(DateTimeUtils.rfc1123ToIsoDateString))
val subject = Option(message.getSubject).map(MimeUtility.decodeText).orNull
val priority: Option[String] = Option(message.getHeader("X-Priority")).flatMap(_.headOption.map(v => Priority.withRfcValue(v)))
val sensitivity: Option[Sensitivity] = Option(message.getHeader("Sensitivity")).flatMap(_.headOption.flatMap(v => Sensitivity.withRfcOption(v)))
val inReplyTo: List[String] = Option(message.getHeader("In-Reply-To")).map(_.toList).getOrElse(Nil)
val references: List[String] = Option(message.getHeader("References")).map(_.toList).getOrElse(Nil)
val recipients: List[Recipient] = Option(message.getAllRecipients).map(_.toList).getOrElse(Nil)
.collect { case c: InternetAddress => c }
.flatMap { r => Option(r.getAddress).map(Recipient(Option(r.getPersonal), _)) }
message.getContent match {
case content: MimeMultipart =>
val parts = (for (a <- 0 until content.getCount) yield content.getBodyPart(a))
.collect { case p: MimeBodyPart => p }
.flatMap(flattenMultipart)
val attachments = parts.filter(p => Option(p.getEncoding).filter(_.toLowerCase() == "base64").nonEmpty && getFilename(p).nonEmpty)
val nonAttachments = parts.filter(p => getFilename(p).isEmpty)
val bodyPart = nonAttachments.find(_.getContentType.toLowerCase().startsWith("text/plain"))
val htmlPart = nonAttachments.find(_.getContentType.toLowerCase().startsWith("text/html"))
val body = (bodyPart, htmlPart) match {
case (Some(body), _) => body.getContent.asInstanceOf[String]
case (None, Some(html)) => HtmlToPlainText.convert(html.getContent.asInstanceOf[String])
case _ => ""
}
val html: Option[String] = htmlPart
.map(_.getContent.asInstanceOf[String])
.map(Email.inlineAttachmentsIntoHtml(_, attachments.iterator)(a =>
Option(a.getContentID).map { id =>
(a.getContentType, id.removeChevrons(), a.getInputStream)
}
))
val attachmentCount = attachments.flatMap(getRawContentDisposition).count(!_.startsWith("inline"))
val email = Email.createFrom(uri, from, recipients, sentAt, sensitivity, priority, subject, body, inReplyTo, references, html, attachmentCount)
Some((email, attachments))
case plainText: String =>
val email = Email.createFrom(uri, from, recipients, sentAt, sensitivity, priority, subject, plainText, inReplyTo, references, None, 0)
Some((email, Nil))
case is: InputStream =>
// Just a single attachment, no message text body
val headers = new InternetHeaders()
headers.addHeader("Content-Type", message.getContentType)
headers.addHeader("Content-Disposition", message.getDisposition)
val email = Email.createFrom(uri, from, recipients, sentAt, sensitivity, priority, subject, "<empty>", inReplyTo, references, None, 0)
val attachment = new MimeBodyPart(headers, IOUtils.toByteArray(is))
Some((email, Seq(attachment)))
case other =>
logger.info(s"Unknown EML message content type ${other.getClass}")
None
}
}