def parseMessage()

in backend/app/extraction/email/eml/EmlParser.scala [24:94]


  def parseMessage(message: Message): Option[(Email, Seq[MimeBodyPart])] = {
    val uri = getMessageUri(message)

    val senderAddress = Option(message.getFrom).flatMap(_.headOption.map(_.asInstanceOf[InternetAddress]))
    val from = senderAddress.map { addr => Recipient(Option(addr.getPersonal), addr.getAddress) }

    val sentAt = Option(message.getHeader("Date")).flatMap(_.headOption.flatMap(DateTimeUtils.rfc1123ToIsoDateString))

    val subject = Option(message.getSubject).map(MimeUtility.decodeText).orNull

    val priority: Option[String] = Option(message.getHeader("X-Priority")).flatMap(_.headOption.map(v => Priority.withRfcValue(v)))
    val sensitivity: Option[Sensitivity] = Option(message.getHeader("Sensitivity")).flatMap(_.headOption.flatMap(v => Sensitivity.withRfcOption(v)))

    val inReplyTo: List[String] = Option(message.getHeader("In-Reply-To")).map(_.toList).getOrElse(Nil)
    val references: List[String] = Option(message.getHeader("References")).map(_.toList).getOrElse(Nil)
    val recipients: List[Recipient] = Option(message.getAllRecipients).map(_.toList).getOrElse(Nil)
      .collect { case c: InternetAddress => c }
      .flatMap { r => Option(r.getAddress).map(Recipient(Option(r.getPersonal), _)) }

    message.getContent match {
      case content: MimeMultipart =>
        val parts = (for (a <- 0 until content.getCount) yield content.getBodyPart(a))
          .collect { case p: MimeBodyPart => p }
          .flatMap(flattenMultipart)

        val attachments = parts.filter(p => Option(p.getEncoding).filter(_.toLowerCase() == "base64").nonEmpty && getFilename(p).nonEmpty)
        val nonAttachments = parts.filter(p => getFilename(p).isEmpty)

        val bodyPart = nonAttachments.find(_.getContentType.toLowerCase().startsWith("text/plain"))
        val htmlPart = nonAttachments.find(_.getContentType.toLowerCase().startsWith("text/html"))

        val body = (bodyPart, htmlPart) match {
          case (Some(body), _) => body.getContent.asInstanceOf[String]
          case (None, Some(html)) => HtmlToPlainText.convert(html.getContent.asInstanceOf[String])
          case _ => ""
        }

        val html: Option[String] = htmlPart
          .map(_.getContent.asInstanceOf[String])
          .map(Email.inlineAttachmentsIntoHtml(_, attachments.iterator)(a =>
            Option(a.getContentID).map { id =>
              (a.getContentType, id.removeChevrons(), a.getInputStream)
            }
          ))

        val attachmentCount = attachments.flatMap(getRawContentDisposition).count(!_.startsWith("inline"))
        val email = Email.createFrom(uri, from, recipients, sentAt, sensitivity, priority, subject, body, inReplyTo, references, html, attachmentCount)

        Some((email, attachments))

      case plainText: String =>
        val email = Email.createFrom(uri, from, recipients, sentAt, sensitivity, priority, subject, plainText, inReplyTo, references, None, 0)
        Some((email, Nil))

      case is: InputStream =>
        // Just a single attachment, no message text body

        val headers = new InternetHeaders()
        headers.addHeader("Content-Type", message.getContentType)
        headers.addHeader("Content-Disposition", message.getDisposition)

        val email = Email.createFrom(uri, from, recipients, sentAt, sensitivity, priority, subject, "<empty>", inReplyTo, references, None, 0)
        val attachment = new MimeBodyPart(headers, IOUtils.toByteArray(is))

        Some((email, Seq(attachment)))

      case other =>
        logger.info(s"Unknown EML message content type ${other.getClass}")
        None
    }
  }