hphp/runtime/ext/mailparse/mime.cpp

/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | | Copyright (c) 1997-2010 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/runtime/ext/mailparse/mime.h" #include "hphp/runtime/ext/stream/ext_stream.h" #include "hphp/runtime/base/array-init.h" #include "hphp/runtime/base/array-iterator.h" #include "hphp/runtime/base/builtin-functions.h" #include "hphp/runtime/base/mem-file.h" #include "hphp/runtime/base/runtime-error.h" #define MAXLEVELS 20 #define MAXPARTS 300 namespace HPHP { /////////////////////////////////////////////////////////////////////////////// MimePart::MimeHeader::MimeHeader() : m_empty(true) { } MimePart::MimeHeader::MimeHeader(const char *value) : m_empty(false) { assertx(value); m_attributes = Array::CreateDict(); m_value = String(value, CopyString); } MimePart::MimeHeader::MimeHeader(php_rfc822_tokenized_t *toks) : m_empty(false) { int i, first_semi, next_semi, comments_before_semi, netscape_bug = 0; String name_buf; StringBuffer value_buf; bool is_rfc2231_name = false; char *check_name; int charset_p = 0, prevcharset_p = 0; bool namechanged = false, currentencoded = false; m_attributes = Array::CreateDict(); /* php_rfc822_print_tokens(toks); */ /* look for optional ; which separates optional attributes from the main value */ for (first_semi = 2; first_semi < toks->ntokens; first_semi++) if (toks->tokens[first_semi].token == ';') break; m_value = String(php_rfc822_recombine_tokens (toks, 2, first_semi - 2, PHP_RFC822_RECOMBINE_STRTOLOWER | PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString); if (first_semi < toks->ntokens) first_semi++; /* Netscape Bug: Messenger sometimes omits the semi when wrapping the * the header. * That means we have to be even more clever than the spec says that * we need to :-/ * */ while (first_semi < toks->ntokens) { /* find the next ; */ comments_before_semi = 0; for (next_semi = first_semi; next_semi < toks->ntokens; next_semi++) { if (toks->tokens[next_semi].token == ';') break; if (toks->tokens[next_semi].token == '(') comments_before_semi++; } i = first_semi; if (i < next_semi) { i++; /* ignore comments */ while (i < next_semi && toks->tokens[i].token == '(') { i++; } if (i < next_semi && toks->tokens[i].token == '=') { /* Here, next_semi --> "name" and i --> "=", so skip "=" sign */ i++; /* count those tokens; we expect "token = token" (3 tokens); if there * are more than that, then something is quite possibly wrong * - Netscape Bug! */ if (next_semi < toks->ntokens && toks->tokens[next_semi].token != ';' && next_semi - first_semi - comments_before_semi > 3) { next_semi = i + 1; netscape_bug = 1; } String name(php_rfc822_recombine_tokens (toks, first_semi, 1, PHP_RFC822_RECOMBINE_STRTOLOWER| PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString); String value(php_rfc822_recombine_tokens (toks, i, next_semi - i, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString); /* support rfc2231 mime parameter value * * Parameter Value Continuations: * * Content-Type: message/external-body; access-type=URL; * URL*0="ftp://"; * URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar" * * is semantically identical to * * Content-Type: message/external-body; access-type=URL; * URL="ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar" * * Original rfc2231 support by IceWarp Ltd. <info@icewarp.com> */ check_name = const_cast<char*>(strchr(name.data(), '*')); if (check_name) { currentencoded = true; /* Is last char * - charset encoding */ charset_p = (name[name.size() -1] == '*'); /* Leave only attribute name without * */ *check_name = 0; /* New item or continuous */ if (name_buf.isNull()) { namechanged = false; name_buf = name; } else { namechanged = (name_buf != name); if (!namechanged) { name.clear(); } } /* Check if name changed*/ if (!namechanged) { /* Append string to buffer - check if to be encoded... */ rfc2231_to_mime(value_buf, (char*)value.data(), charset_p, prevcharset_p); /* Mark previous */ prevcharset_p = charset_p; } is_rfc2231_name = true; } /* Last item was encoded */ if (is_rfc2231_name) { /* Name not null and name differs with new name*/ if (!name.empty() && name_buf != name) { /* Finalize packet */ rfc2231_to_mime(value_buf, NULL, 0, prevcharset_p); auto const name_key = m_attributes.convertKey<IntishCast::Cast>(name_buf); auto str = value_buf.detach(); m_attributes.set(name_key, make_tv<KindOfString>(str.get())); value_buf.clear(); prevcharset_p = 0; is_rfc2231_name = false; name_buf.clear(); /* New non encoded name*/ if (!currentencoded) { /* Add string*/ auto const updated_name_key = m_attributes.convertKey<IntishCast::Cast>(name); m_attributes.set(updated_name_key, make_tv<KindOfString>(value.get())); } else { /* Encoded name changed*/ if (namechanged) { /* Append string to buffer - check if to be encoded... */ rfc2231_to_mime(value_buf, (char*)value.data(), charset_p, prevcharset_p); /* Mark */ is_rfc2231_name = true; name_buf = name; prevcharset_p = charset_p; } } namechanged = false; } } else { auto const name_key = m_attributes.convertKey<IntishCast::Cast>(name); m_attributes.set(name_key, make_tv<KindOfString>(value.get())); } } } if (next_semi < toks->ntokens && !netscape_bug) { next_semi++; } first_semi = next_semi; netscape_bug = 0; } if (is_rfc2231_name) { /* Finalize packet */ rfc2231_to_mime(value_buf, NULL, 0, prevcharset_p); auto const name_key = m_attributes.convertKey<IntishCast::Cast>(name_buf); auto str = value_buf.detach(); m_attributes.set(name_key, make_tv<KindOfString>(str.get())); } } void MimePart::MimeHeader::clear() { m_empty = true; m_value.clear(); m_attributes.reset(); } Variant MimePart::MimeHeader::get(const String& attrname) { auto const arrkey = m_attributes.convertKey<IntishCast::Cast>(attrname); return m_attributes[arrkey]; } void MimePart::MimeHeader::getAll(Array &ret, const String& valuelabel, const String& attrprefix) { for (ArrayIter iter(m_attributes); iter; ++iter) { String s = attrprefix + iter.first().toString(); auto const arrkey = ret.convertKey<IntishCast::Cast>(s); ret.set(arrkey, iter.secondVal()); } /* do this last so that a bogus set of headers like this: * Content-Type: multipart/related; * boundary="----=_NextPart_00_0017_01C091F4.1B5EF6B0"; * type="text/html" * * doesn't overwrite content-type with the type="text/html" * value. * */ auto const arrkey = ret.convertKey<IntishCast::Cast>(valuelabel); ret.set(arrkey, make_tv<KindOfString>(m_value.get())); } void MimePart::MimeHeader::rfc2231_to_mime(StringBuffer &value_buf, char* value, int charset_p, int prevcharset_p) { char *strp, *startofvalue = NULL; int quotes = 0; /* Process string, get positions and replace */ /* Set to start of buffer*/ if (charset_p) { /* Previous charset already set so only convert %nn to =nn*/ if (prevcharset_p) quotes=2; strp = value; while (*strp) { /* Quote handling*/ if (*strp == '\'') { if (quotes <= 1) { /* End of charset*/ if (quotes == 0) { *strp=0; } else { startofvalue = strp+1; } quotes++; } } else { /* Replace % with = - quoted printable*/ if (*strp == '%' && quotes==2) { *strp = '='; } } strp++; } } /* If first encoded token*/ if (charset_p && !prevcharset_p && startofvalue) { value_buf.append("=?"); value_buf.append(value); value_buf.append("?Q?"); value_buf.append(startofvalue); } /* If last encoded token*/ if (prevcharset_p && !charset_p) { value_buf.append("?="); } /* Append value*/ if ((!charset_p || (prevcharset_p && charset_p)) && value) { value_buf.append(value); } } /////////////////////////////////////////////////////////////////////////////// MimePart::MimePart() : m_startpos(0), m_endpos(0), m_bodystart(0), m_bodyend(0), m_nlines(0), m_nbodylines(0) { m_headers = Array::CreateDict(); /* begin in header parsing mode */ m_parsedata.in_header = true; m_parsedata.is_dummy = false; m_parsedata.completed = false; } /////////////////////////////////////////////////////////////////////////////// // enumeration bool MimePart::enumeratePartsImpl(Enumerator *top, Enumerator **child, PFUNC_ENUMERATOR callback, void *ptr) { *child = NULL; if (!(this->*callback)(top, ptr)) return false; Enumerator next; *child = &next; next.id = 1; if (!strncasecmp(m_content_type.m_value.data(), "multipart/", 10)) { next.id = 0; } for (ArrayIter iter(m_children); iter; ++iter) { if (next.id) { auto childpart = cast<MimePart>(iter.second()); if (!childpart->enumeratePartsImpl(top, &next.next, callback, ptr)) { return false; } } next.id++; } return true; } void MimePart::enumerateParts(PFUNC_ENUMERATOR callback, void *ptr) { Enumerator top; top.id = 1; enumeratePartsImpl(&top, &top.next, callback, ptr); } struct find_part_struct { const char *searchfor; MimePart *foundpart; }; bool MimePart::getStructure(Enumerator *id, void *ptr) { char intbuf[16]; int len, i = 0; int buf_size = 1024; char *buf = (char*)malloc(buf_size); buf[0] = '\0'; while (id && i < buf_size) { sprintf(intbuf, "%d", id->id); len = strlen(intbuf); if (len > (buf_size-i)) { raise_warning("too many nested sections in message"); free(buf); return false; } if ((i + len + 1) >= buf_size) { buf_size = buf_size << 1; buf = (char*)realloc(buf, buf_size); if (!buf) { raise_fatal_error( folly::sformat("The structure buffer has been exceeded " "({}). Please try decreasing the nesting " "depth of messages and report this to the " "developers.", buf_size).c_str()); } } sprintf(&buf[i], "%s%c", intbuf, id->next ? '.' : '\0'); i += len + (id->next ? 1 : 0); id = id->next; } ((Array*)ptr)->append(String(buf, AttachString)); return true; } Array MimePart::getStructure() { Array ret = Array::CreateDict(); enumerateParts(&MimePart::getStructure, &ret); return ret; } bool MimePart::findPart(Enumerator *id, void *ptr) { struct find_part_struct *find = (find_part_struct *)ptr; const unsigned char *num = (const unsigned char*)find->searchfor; unsigned int n; while (id) { if (!isdigit((int)*num)) return true; /* convert from decimal to int */ n = 0; while (isdigit((int)*num)) n = (n * 10) + (*num++ - '0'); if (*num) { if (*num != '.') return true; num++; } if (n != (unsigned int)id->id) return true; id = id->next; } if (*num == 0) find->foundpart = this; return true; } Resource MimePart::findByName(const char *name) { struct find_part_struct find; find.searchfor = name; find.foundpart = NULL; enumerateParts(&MimePart::findPart, &find); return Resource{find.foundpart}; } static int filter_into_work_buffer(int c, void *dat) { MimePart *part = (MimePart*)dat; return part->filter(c); } int MimePart::filter(int c) { char buf[] = {(char)c, '\0'}; m_parsedata.workbuf += buf; if (m_parsedata.workbuf.size() >= 4096) { (this->*m_extract_func)(m_parsedata.workbuf); m_parsedata.workbuf.clear(); } return c; } void MimePart::decoderPrepare(bool do_decode) { enum mbfl_no_encoding from = mbfl_no_encoding_8bit; if (do_decode && !m_content_transfer_encoding.empty()) { from = mbfl_name2no_encoding(m_content_transfer_encoding.data()); if (from == mbfl_no_encoding_invalid) { if (strcasecmp("binary", m_content_transfer_encoding.data()) != 0) { raise_warning("mbstring doesn't know how to decode %s " "transfer encoding!", m_content_transfer_encoding.data()); } from = mbfl_no_encoding_8bit; } } m_parsedata.workbuf.clear(); if (do_decode) { if (from == mbfl_no_encoding_8bit || from == mbfl_no_encoding_7bit) { m_extract_filter = NULL; } else { m_extract_filter = mbfl_convert_filter_new(from, mbfl_no_encoding_8bit, filter_into_work_buffer, NULL, this); } } } void MimePart::decoderFinish() { if (m_extract_filter) { mbfl_convert_filter_flush(m_extract_filter); mbfl_convert_filter_delete(m_extract_filter); } if (m_extract_func && !m_parsedata.workbuf.empty()) { (this->*m_extract_func)(m_parsedata.workbuf); m_parsedata.workbuf.clear(); } } void MimePart::decoderFeed(const String& str) { if (!str.empty()) { if (m_extract_filter) { for (int i = 0; i < str.size(); i++) { if (mbfl_convert_filter_feed(str[i], m_extract_filter) < 0) { raise_warning("filter conversion failed. Input message is " "probably incorrectly encoded"); return; } } } else { (this->*m_extract_func)(str); } } } const StaticString s_1_pt_0("1.0"); bool MimePart::isVersion1() { return m_mime_version == s_1_pt_0 || m_parent; } const StaticString s_headers("headers"), s_starting_pos("starting-pos"), s_starting_pos_body("starting-pos-body"), s_ending_pos("ending-pos"), s_ending_pos_body("ending-pos-body"), s_line_count("line-count"), s_body_line_count("body-line-count"), s_charset("charset"), s_transfer_encoding("transfer-encoding"), s_content_type("content-type"), s_content_("content-"), s_text_plain_error("text/plain, (error)"), s_content_disposition("content-disposition"), s_disposition_("disposition-"), s_content_location("content-location"), s_content_base("content-base"), s_content_boundary("content-boundary"), s_content_id("content-id"), s_content_description("content-description"), s_content_language("content-language"), s_content_md5("content-md5"), s_boundary("boundary"), s_to("to"), s_cc("cc"), s_mime_version("mime-version"), s_content_transfer_encoding("content-transfer-encoding"); Variant MimePart::getPartData() { Array ret = Array::CreateDict(); ret.set(s_headers, m_headers); ret.set(s_starting_pos, m_startpos); ret.set(s_starting_pos_body, m_bodystart); if (!m_parent) { ret.set(s_ending_pos, m_endpos); ret.set(s_ending_pos_body, m_bodyend); ret.set(s_line_count, m_nlines); ret.set(s_body_line_count, m_nbodylines); } else { ret.set(s_ending_pos, m_bodyend); ret.set(s_ending_pos_body, m_bodyend); ret.set(s_line_count, m_nlines ? m_nlines - 1 : m_nlines); ret.set(s_body_line_count, m_nbodylines ? m_nbodylines - 1 : m_nbodylines); } if (!m_charset.empty()) { ret.set(s_charset, m_charset); } else { ret.set(s_charset, "us-ascii"); } if (!m_content_transfer_encoding.empty()) { ret.set(s_transfer_encoding, m_content_transfer_encoding); } else { ret.set(s_transfer_encoding, "8bit"); } if (!m_content_type.empty()) { m_content_type.getAll(ret, s_content_type, s_content_); } else { ret.set(s_content_type, s_text_plain_error); } if (!m_content_disposition.empty()) { m_content_disposition.getAll(ret, s_content_disposition, s_disposition_); } if (!m_content_location.empty()) { ret.set(s_content_location, m_content_location); } if (!m_content_base.empty()) { ret.set(s_content_base, m_content_base); } else { ret.set(s_content_base, "/"); } if (!m_boundary.empty()) { ret.set(s_content_boundary, m_boundary); } /* extract the address part of the content-id only */ Variant contentId = m_headers[s_content_id]; if (!contentId.isNull()) { php_rfc822_tokenized_t *toks = php_mailparse_rfc822_tokenize((const char*)contentId.toString().data(), true); php_rfc822_addresses_t *addrs = php_rfc822_parse_address_tokens(toks); if (addrs->naddrs > 0) { ret.set(s_content_id, String(addrs->addrs[0].address, CopyString)); } php_rfc822_free_addresses(addrs); php_rfc822_tokenize_free(toks); } auto copyHeader = [&](const Variant& key) { if (m_headers.exists(key)) ret.set(key, m_headers[key]); }; copyHeader(s_content_description); copyHeader(s_content_language); copyHeader(s_content_md5); return ret; } bool MimePart::parse(const char *buf, int bufsize) { while (bufsize > 0) { /* look for EOL */ int len = 0; for (; len < bufsize; len++) { if (buf[len] == '\n') break; } if (len < bufsize && buf[len] == '\n') { ++len; m_parsedata.workbuf += String(buf, len, CopyString); if (!ProcessLine(req::ptr<MimePart>(this), m_parsedata.workbuf)) { // ProcessLine() only returns FAILURE in case the count of children // have exceeded MAXPARTS at the very beginning, without doing any work. // Short-circuit since the exceeded state won't change on subsequent // calls. return false; } m_parsedata.workbuf.clear(); } else { m_parsedata.workbuf += String(buf, len, CopyString); } buf += len; bufsize -= len; } return true; } req::ptr<MimePart> MimePart::createChild(int startpos, bool inherit) { auto child = req::make<MimePart>(); m_parsedata.lastpart = child; child->m_parent = this; m_children.append(Resource(child)); child->m_startpos = child->m_endpos = child->m_bodystart = child->m_bodyend = startpos; if (inherit) { child->m_content_transfer_encoding = m_content_transfer_encoding; child->m_charset = m_charset; } return child; } bool MimePart::processHeader() { if (m_parsedata.headerbuf.empty()) { return true; } /* parse the header line */ php_rfc822_tokenized_t *toks = php_mailparse_rfc822_tokenize(m_parsedata.headerbuf.data(), 0); /* valid headers consist of at least three tokens, with the first being a string and the second token being a ':' */ if (toks->ntokens < 2 || toks->tokens[0].token != 0 || toks->tokens[1].token != ':') { m_parsedata.headerbuf.clear(); php_rfc822_tokenize_free(toks); return false; } /* get a lower-case version of the first token */ String header_key(php_rfc822_recombine_tokens (toks, 0, 1, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS| PHP_RFC822_RECOMBINE_STRTOLOWER), AttachString); const char *header_val = strchr(m_parsedata.headerbuf.data(), ':'); String header_val_stripped(php_rfc822_recombine_tokens (toks, 2, toks->ntokens-2, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS| PHP_RFC822_RECOMBINE_STRTOLOWER), AttachString); if (header_val) { header_val++; while (isspace(*header_val)) { header_val++; } auto const header_arrkey = m_headers.convertKey<IntishCast::Cast>(header_key); /* add the header to the hash. * join multiple To: or Cc: lines together */ if ((header_key == s_to || header_key == s_cc) && m_headers.exists(header_arrkey)) { String newstr = m_headers[header_arrkey].toString(); newstr += ", "; newstr += header_val; m_headers.set(header_arrkey, make_tv<KindOfString>(newstr.get())); } else { if (m_headers.exists(header_arrkey)) { auto const zheaderval = m_headers.lval(header_arrkey); if (isArrayLikeType(zheaderval.type())) { asArrRef(zheaderval).append(String(header_val, CopyString)); } else { // Create a nested array if there is more than one of the same header Array zarr = Array::CreateVec(); zarr.append(zheaderval.tv()); zarr.append(String(header_val, CopyString)); m_headers.set(header_arrkey, make_array_like_tv(zarr.get())); } } else { String str(header_val, CopyString); m_headers.set(header_arrkey, make_tv<KindOfString>(str.get())); } } /* if it is useful, keep a pointer to it in the mime part */ if (header_key == s_mime_version) { m_mime_version = header_val_stripped; } else if (header_key == s_content_location) { m_content_location = String(php_rfc822_recombine_tokens (toks, 2, toks->ntokens-2, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString); } else if (header_key == s_content_base) { m_content_base = String(php_rfc822_recombine_tokens (toks, 2, toks->ntokens-2, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString); } else if (header_key == s_content_transfer_encoding) { m_content_transfer_encoding = header_val_stripped; } else if (header_key == s_content_type) { m_content_type = MimeHeader(toks); Variant boundary = m_content_type.get(s_boundary); if (!boundary.isNull()) { m_boundary = boundary.toString(); } Variant charset = m_content_type.get(s_charset); if (!charset.isNull()) { m_charset = charset.toString(); } } else if (header_key == s_content_disposition) { m_content_disposition = MimeHeader(toks); } } php_rfc822_tokenize_free(toks); m_parsedata.headerbuf.clear(); return true; } bool MimePart::ProcessLine(req::ptr<MimePart> workpart, const String& line) { /* sanity check */ if (workpart->m_children.size() > MAXPARTS) { raise_warning("MIME message too complex"); return false; } const char *c = line.data(); /* strip trailing \r\n -- we always have a trailing \n */ int origcount = line.size(); int linelen = origcount - 1; if (linelen && c[linelen-1] == '\r') { --linelen; } /* Discover which part we were last working on */ while (workpart->m_parsedata.lastpart) { auto lastpart = workpart->m_parsedata.lastpart; if (lastpart->m_parsedata.completed) { UpdatePositions(workpart, workpart->m_endpos + origcount, workpart->m_endpos + origcount, 1); return true; } if (workpart->m_boundary.empty() || workpart->m_parsedata.in_header) { workpart = lastpart; continue; } int bound_len = workpart->m_boundary.size(); /* Look for a boundary */ if (c[0] == '-' && c[1] == '-' && linelen >= 2+bound_len && strncasecmp(workpart->m_boundary.data(), c+2, bound_len) == 0) { /* is it the final boundary ? */ if (linelen >= 4 + bound_len && strncmp(c+2+bound_len, "--", 2) == 0) { lastpart->m_parsedata.completed = true; UpdatePositions(workpart, workpart->m_endpos + origcount, workpart->m_endpos + origcount, 1); return true; } auto newpart = workpart->createChild(workpart->m_endpos + origcount, true); UpdatePositions(workpart, workpart->m_endpos + origcount, workpart->m_endpos + linelen, 1); newpart->m_mime_version = workpart->m_mime_version; newpart->m_parsedata.in_header = true; return true; } workpart = lastpart; } if (!workpart->m_parsedata.in_header) { if (!workpart->m_parsedata.completed && !workpart->m_parsedata.lastpart) { /* update the body/part end positions. * For multipart messages, the final newline belongs to the boundary. * Otherwise it belongs to the body * */ if (workpart->m_parent && strncasecmp(workpart->getParent()->m_content_type.m_value.data(), "multipart/", 10) == 0) { UpdatePositions(workpart, workpart->m_endpos + origcount, workpart->m_endpos + linelen, true); } else { UpdatePositions(workpart, workpart->m_endpos + origcount, workpart->m_endpos + origcount, true); } } } else { if (linelen > 0) { UpdatePositions(workpart, workpart->m_endpos + origcount, workpart->m_endpos + linelen, true); if (*c == ' ' || *c == '\t') { /* This doesn't technically confirm to rfc2822, as we're replacing \t with \s, but this seems to fix cases where clients incorrectly fold by inserting a \t character. */ workpart->m_parsedata.headerbuf += " "; c++; linelen--; } else { workpart->processHeader(); } /* save header for possible continuation */ workpart->m_parsedata.headerbuf += String(c, linelen, CopyString); } else { /* end of headers */ workpart->processHeader(); /* start of body */ workpart->m_parsedata.in_header = false; workpart->m_bodystart = workpart->m_endpos + origcount; UpdatePositions(workpart, workpart->m_bodystart, workpart->m_bodystart, true); --workpart->m_nbodylines; /* some broken mailers include the content-type header but not a * mime-version header. * Let's relax and pretend they said they were mime 1.0 compatible */ if (workpart->m_mime_version.empty() && !workpart->m_content_type.empty()) { workpart->m_mime_version = "1.0"; } if (!workpart->isVersion1()) { /* if we don't understand the MIME version, discard the content-type and boundary */ workpart->m_content_disposition.clear(); workpart->m_boundary.clear(); workpart->m_content_type.clear(); workpart->m_content_type = MimeHeader("text/plain"); } /* if there is no content type, default to text/plain, but use multipart/digest when in a multipart/rfc822 message */ if (workpart->isVersion1() && workpart->m_content_type.empty()) { char *def_type = "text/plain"; if (workpart->m_parent && strcasecmp(workpart->getParent()->m_content_type.m_value.data(), "multipart/digest") == 0) { def_type = "message/rfc822"; } workpart->m_content_type = MimeHeader(def_type); } /* if no charset had previously been set, either through inheritance * or by an explicit content-type header, default to us-ascii */ if (workpart->m_charset.isNull()) { workpart->m_charset = "us-ascii"; } if (strcasecmp(workpart->m_content_type.m_value.data(), "message/rfc822") == 0) { workpart = workpart->createChild(workpart->m_bodystart, false); workpart->m_parsedata.in_header = true; return true; } /* create a section for the preamble that precedes the first boundary */ if (!workpart->m_boundary.empty()) { workpart = workpart->createChild(workpart->m_bodystart, true); workpart->m_parsedata.in_header = false; workpart->m_parsedata.is_dummy = true; return true; } return true; } } return true; } void MimePart::UpdatePositions(req::ptr<MimePart> part, int newendpos, int newbodyend, int deltanlines) { while (part) { part->m_endpos = newendpos; part->m_bodyend = newbodyend; part->m_nlines += deltanlines; if (!part->m_parsedata.in_header) { part->m_nbodylines += deltanlines; } part = part->m_parent; } } Variant MimePart::extract(const Variant& filename, const Variant& callbackfunc, int decode, bool isfile) { /* filename can be a filename or a stream */ req::ptr<File> file; if (filename.isResource()) { file = cast<File>(filename); } else if (isfile) { file = File::Open(filename.toString(), "rb"); } else { /* filename is the actual data */ String data = filename.toString(); file = req::make<MemFile>(data.data(), data.size()); } if (!file) { return false; } m_extract_context = callbackfunc; if (callbackfunc.isString() && callbackfunc.toString().empty()) { m_extract_func = &MimePart::outputToStdout; } else { if (callbackfunc.isNull()) { m_extract_func = &MimePart::outputToString; } else if (callbackfunc.isResource()) { m_extract_func = &MimePart::outputToFile; } else { m_extract_func = &MimePart::callUserFunc; } } if (extractImpl(decode, file)) { if (callbackfunc.isNull()) { return m_extract_context; } if (callbackfunc.isResource()) { return HHVM_FN(stream_get_contents)(callbackfunc.toResource()); } return true; } return init_null(); } int MimePart::extractImpl(int decode, req::ptr<File> src) { /* figure out where the message part starts/ends */ int start_pos = (decode & DecodeNoHeaders) ? m_bodystart : m_startpos; int end = (decode & DecodeNoBody) ? m_bodystart : (m_parent ? m_bodyend : m_endpos); decoderPrepare(decode & Decode8Bit); if (!src->seek(start_pos)) { raise_warning("unable to seek to section start"); decoderFinish(); return false; } while (start_pos < end) { int n = 4095; if (n > end - start_pos) n = end - start_pos; String str = src->read(n); if (str.empty()) { raise_warning("error reading from file at offset %d", start_pos); decoderFinish(); return false; } decoderFeed(str); start_pos += str.size(); } decoderFinish(); return true; } void MimePart::callUserFunc(const String& s) { vm_call_user_func(m_extract_context, make_vec_array(s)); } void MimePart::outputToStdout(const String& s) { g_context->write(s); } void MimePart::outputToFile(const String& s) { cast<File>(m_extract_context)->write(s); } void MimePart::outputToString(const String& s) { m_extract_context = m_extract_context.toString() + s; } /////////////////////////////////////////////////////////////////////////////// }

hphp/runtime/ext/mailparse/mime.cpp (762 lines of code) (raw):