nsresult nsParseMailMessageState::ParseHeaders()

in mailnews/local/src/nsParseMailbox.cpp [507:751]


nsresult nsParseMailMessageState::ParseHeaders() {
  char* buf = m_headers.begin();
  uint32_t buf_length = m_headers.length();
  if (buf_length == 0) {
    // No header of an expected type is present. Consider this a successful
    // parse so email still shows on summary and can be accessed and deleted.
    return NS_OK;
  }
  char* buf_end = buf + buf_length;
  if (!(buf_length > 1 &&
        (buf[buf_length - 1] == '\r' || buf[buf_length - 1] == '\n'))) {
    NS_WARNING("Header text should always end in a newline");
    return NS_ERROR_UNEXPECTED;
  }
  while (buf < buf_end) {
    char* colon = PL_strnchr(buf, ':', buf_end - buf);
    char* value = 0;
    HeaderData* header = nullptr;
    HeaderData receivedBy;

    if (!colon) break;

    nsDependentCSubstring headerStr(buf, colon);
    ToLowerCase(headerStr);

    // Obtain firstChar in headerStr. But if headerStr is empty, just set it to
    // the colon. This is needed because First() asserts on an empty string.
    char firstChar = !headerStr.IsEmpty() ? headerStr.First() : *colon;

    // See RFC 5322 section 3.6 for min-max number for given header.
    // If multiple headers exist we need to make sure to use the first one.

    switch (firstChar) {
      case 'b':
        if (headerStr.EqualsLiteral("bcc") && !m_bccList.length)
          header = &m_bccList;
        break;
      case 'c':
        if (headerStr.EqualsLiteral("cc")) {  // XXX: RFC 5322 says it's 0 or 1.
          header = m_ccList.AppendElement(HeaderData());
        } else if (headerStr.EqualsLiteral("content-type")) {
          header = &m_content_type;
        }
        break;
      case 'd':
        if (headerStr.EqualsLiteral("date") && !m_date.length)
          header = &m_date;
        else if (headerStr.EqualsLiteral("disposition-notification-to"))
          header = &m_mdn_dnt;
        else if (headerStr.EqualsLiteral("delivery-date"))
          header = &m_delivery_date;
        break;
      case 'f':
        if (headerStr.EqualsLiteral("from") && !m_from.length) {
          header = &m_from;
        }
        break;
      case 'i':
        if (headerStr.EqualsLiteral("in-reply-to") && !m_in_reply_to.length)
          header = &m_in_reply_to;
        break;
      case 'm':
        if (headerStr.EqualsLiteral("message-id") && !m_message_id.length)
          header = &m_message_id;
        break;
      case 'n':
        if (headerStr.EqualsLiteral("newsgroups")) header = &m_newsgroups;
        break;
      case 'o':
        if (headerStr.EqualsLiteral("original-recipient"))
          header = &m_mdn_original_recipient;
        break;
      case 'p':
        // we could very well care what the priority header was when we
        // remember its value. If so, need to remember it here. Also,
        // different priority headers can appear in the same message,
        // but we only remember the last one that we see. Applies also to
        // x-priority checked below.
        if (headerStr.EqualsLiteral("priority")) header = &m_priority;
        break;
      case 'r':
        if (headerStr.EqualsLiteral("references") && !m_references.length)
          header = &m_references;
        else if (headerStr.EqualsLiteral("return-path"))
          header = &m_return_path;
        // treat conventional Return-Receipt-To as MDN
        // Disposition-Notification-To
        else if (headerStr.EqualsLiteral("return-receipt-to"))
          header = &m_mdn_dnt;
        else if (headerStr.EqualsLiteral("reply-to") && !m_replyTo.length)
          header = &m_replyTo;
        else if (headerStr.EqualsLiteral("received")) {
          header = &receivedBy;
        }
        break;
      case 's':
        if (headerStr.EqualsLiteral("subject") && !m_subject.length)
          header = &m_subject;
        else if (headerStr.EqualsLiteral("sender") && !m_sender.length)
          header = &m_sender;
        else if (headerStr.EqualsLiteral("status"))
          header = &m_status;
        break;
      case 't':
        if (headerStr.EqualsLiteral("to")) {  // XXX: RFC 5322 says it's 0 or 1.
          header = m_toList.AppendElement(HeaderData());
        }
        break;
      case 'x':
        if (headerStr.EqualsIgnoreCase(X_MOZILLA_STATUS2) &&
            !m_mozstatus2.length)
          header = &m_mozstatus2;
        else if (headerStr.EqualsIgnoreCase(X_MOZILLA_STATUS) &&
                 !m_mozstatus.length)
          header = &m_mozstatus;
        else if (headerStr.EqualsIgnoreCase(HEADER_X_MOZILLA_ACCOUNT_KEY) &&
                 !m_account_key.length)
          header = &m_account_key;
        else if (headerStr.EqualsLiteral("x-priority"))  // See case 'p' above.
          header = &m_priority;
        else if (headerStr.EqualsIgnoreCase(HEADER_X_MOZILLA_KEYWORDS) &&
                 !m_keywords.length)
          header = &m_keywords;
        break;
    }

    if (!header && m_customDBHeaders.Length()) {
      MOZ_ASSERT(m_customDBHeaders.Length() == m_customDBHeaderData.Length(),
                 "m_customDBHeaderData should be in sync.");
      size_t customHeaderIndex = m_customDBHeaders.IndexOf(headerStr);
      if (customHeaderIndex != nsTArray<nsCString>::NoIndex) {
        header = &m_customDBHeaderData[customHeaderIndex];
      }
    }

    buf = colon + 1;
    // We will be shuffling downwards, so this is our insertion point.
    char* bufWrite = buf;

  SEARCH_NEWLINE:
    // move past any non terminating characters, rewriting them if folding white
    // space exists
    while (buf < buf_end && *buf != '\r' && *buf != '\n') {
      if (buf != bufWrite) *bufWrite = *buf;
      buf++;
      bufWrite++;
    }

    // Look for folding, so CRLF, CR or LF followed by space or tab.
    if ((buf + 2 < buf_end && (buf[0] == '\r' && buf[1] == '\n') &&
         (buf[2] == ' ' || buf[2] == '\t')) ||
        (buf + 1 < buf_end && (buf[0] == '\r' || buf[0] == '\n') &&
         (buf[1] == ' ' || buf[1] == '\t'))) {
      // Remove trailing spaces at the "write position" and add a single
      // folding space.
      while (*(bufWrite - 1) == ' ' || *(bufWrite - 1) == '\t') bufWrite--;
      *(bufWrite++) = ' ';

      // Skip CRLF, CR+space or LF+space ...
      buf += 2;

      // ... and skip leading spaces in that line.
      while (buf < buf_end && (*buf == ' ' || *buf == '\t')) buf++;

      // If we get here, the message headers ended in an empty line, like:
      // To: blah blah blah<CR><LF>  <CR><LF>[end of buffer]. The code below
      // requires buf to land on a newline to properly null-terminate the
      // string, so back up a tad so that it is pointing to one.
      if (buf == buf_end) {
        --buf;
        MOZ_ASSERT(*buf == '\n' || *buf == '\r',
                   "Header text should always end in a newline.");
      }
      goto SEARCH_NEWLINE;
    }

    // Null out the remainder after all the white space contained in
    // the header has been folded.
    if (bufWrite < buf) {
      memset(bufWrite, '\0', buf - bufWrite);
    }

    if (header) {
      value = colon + 1;
      // eliminate trailing blanks after the colon
      while (value < bufWrite && (*value == ' ' || *value == '\t')) value++;

      int32_t len = bufWrite - value;
      if (len < 0) {
        header->length = 0;
        header->value = nullptr;
      } else {
        header->length = len;
        header->value = value;
      }
    }
    if (*buf == '\r' || *buf == '\n') {
      char* last = bufWrite;
      char* saveBuf = buf;
      if (*buf == '\r' && buf + 1 < buf_end && buf[1] == '\n') buf++;
      buf++;
      // null terminate the left-over slop so we don't confuse msg filters.
      *saveBuf = 0;
      *last = 0; /* short-circuit const, and null-terminate header. */
    }

    if (header) {
      /* More const short-circuitry... */
      /* strip trailing whitespace */
      while (header->length > 0 && IS_SPACE(header->value[header->length - 1]))
        ((char*)header->value)[--header->length] = 0;
      if (header == &receivedBy) {
        if (m_receivedTime == 0) {
          // parse Received: header for date.
          // We trust the first header as that is closest to recipient,
          // and less likely to be spoofed.
          nsAutoCString receivedHdr(header->value, header->length);
          int32_t lastSemicolon = receivedHdr.RFindChar(';');
          if (lastSemicolon != -1) {
            nsAutoCString receivedDate;
            receivedDate = Substring(receivedHdr, lastSemicolon + 1);
            receivedDate.Trim(" \t\b\r\n");
            PRTime resultTime;
            if (PR_ParseTimeString(receivedDate.get(), false, &resultTime) ==
                PR_SUCCESS)
              m_receivedTime = resultTime;
            else
              NS_WARNING("PR_ParseTimeString failed in ParseHeaders().");
          }
        }
        // Someone might want the received header saved.
        if (m_customDBHeaders.Length()) {
          if (m_customDBHeaders.Contains("received"_ns)) {
            if (!m_receivedValue.IsEmpty()) m_receivedValue.Append(' ');
            m_receivedValue.Append(header->value, header->length);
          }
        }
      }

      MOZ_ASSERT(header->value[header->length] == 0,
                 "Non-null-terminated strings cause very, very bad problems");
    }
  }
  return NS_OK;
}