private def getHeaderValue()

in backend/app/extraction/email/msg/MsgEmailExtractor.scala [32:102]


  private def getHeaderValue(message: OutlookMessage, header: String): Option[String] = Option(message.getHeaders)
    .flatMap(_.lines.collect(Collectors.toList[String]).asScala.find(_.startsWith(s"$header:")))
    .map(_.stripPrefix(s"$header:").trim)
    .filter(!_.isEmpty)

  private def processMessage(blob: Blob, msg: OutlookMessage, params: ExtractionParams): Unit = {
    val uri = msg.getMessageId.hasTextOrNone().map(id => Uri(Email.cleanUri(id)))
    val from = Option(msg.getFromEmail).map(e => Recipient(Option(msg.getFromName), e))
    val sentAt = getHeaderValue(msg, "Date").flatMap(DateTimeUtils.rfc1123ToIsoDateString)
    val subject = Option(msg.getSubject).getOrElse("")

    val priority: Option[String] = getHeaderValue(msg, "X-Priority").map(v => Priority.withRfcValue(v))
    val sensitivity: Option[Sensitivity] = getHeaderValue(msg, "Sensitivity").flatMap(v => Sensitivity.withPstIdOption(v.toInt))

    val inReplyTo: List[String] = getHeaderValue(msg, "In-Reply-To").toList.flatMap(Email.cleanInReplyTo)
    val references: List[String] = getHeaderValue(msg, "References").toList.flatMap(Email.cleanInReplyTo)
    val recipients: List[Recipient] = msg.getRecipients.asScala
      .flatMap(r => Option(r.getAddress)
        .map(e => Recipient(Option(r.getName), e))).toList

    val attachments = msg.getOutlookAttachments.asScala
    val msgAttachments = attachments.collect { case m: OutlookMsgAttachment => m}
    val fileAttachments = attachments.collect { case f: OutlookFileAttachment => f }

    val body = msg.getBodyText
    val html = Option(msg.getBodyHTML).map(msgHtml => Email.inlineAttachmentsIntoHtml(msgHtml, fileAttachments.iterator)(a =>
      Option(a.getContentId).map { id =>
        (a.getMimeTag, id.removeChevrons(), new ByteArrayInputStream(a.getData))
      }
    ))

    val attachmentCount = msgAttachments.length + fileAttachments.count { attachment =>
      Option(attachment.getContentDisposition).forall(!_.startsWith("inline"))
    }

    val email = Email.createFrom(uri, from, recipients, sentAt, sensitivity, priority, subject, body, inReplyTo, references, html, attachmentCount)

    val context = IngestionContextBuilder(blob.uri, params).finishWithEmail(email)
    ingestionServices.ingestEmail(context, "application/vnd.ms-outlook")

    val attachmentBuilder = IngestionContextBuilder(email.uri, params)

    msgAttachments.foreach { m =>
      processMessage(blob, m.getOutlookMessage, params)
    }

    fileAttachments.foreach { attachment =>
      val attachmentStream = new ByteArrayInputStream(attachment.getData)
      val workingDir = scratch.createWorkingDir(s"emails/${email.uri.value}/")

      try {
        // Create Blob URI
        val localPath = workingDir.resolve(attachment.getLongFilename)
        val attachmentFile = scratch.copyToScratchSpace(localPath, attachmentStream)
        val blobUri = Uri(FingerprintServices.createFingerprintFromFile(attachmentFile))

        val mimeType = Option(attachment.getMimeTag)
          .getOrElse(tika.detectType(attachmentFile.toPath).map(_.toString)
            .getOrElse(throw new Exception("Failed to get MIME type for attachment")))

        // Ingest
        val blob = Blob(blobUri, attachmentFile.length(), Set(MimeType(mimeType)))

        val attachmentContext = attachmentBuilder.finishWithFile(attachmentFile.toPath)
        ingestionServices.ingestFile(attachmentContext, blob.uri, attachmentFile.toPath)
      } finally {
        attachmentStream.close()
        FileUtils.deleteDirectory(workingDir.toFile)
      }
    }
  }