hphp/runtime/ext/mbstring/ext_mbstring.cpp (3,950 lines of code) (raw):

/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | | Copyright (c) 1997-2010 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/runtime/ext/mbstring/ext_mbstring.h" #include "hphp/runtime/base/array-init.h" #include "hphp/runtime/base/array-iterator.h" #include "hphp/runtime/base/execution-context.h" #include "hphp/runtime/base/ini-setting.h" #include "hphp/runtime/base/request-event-handler.h" #include "hphp/runtime/base/string-buffer.h" #include "hphp/runtime/base/zend-string.h" #include "hphp/runtime/base/zend-url.h" #include "hphp/runtime/ext/mbstring/php_unicode.h" #include "hphp/runtime/ext/mbstring/unicode_data.h" #include "hphp/runtime/ext/std/ext_std_output.h" #include "hphp/runtime/ext/string/ext_string.h" #include "hphp/util/rds-local.h" #include <map> extern "C" { #include <mbfl/mbfl_convert.h> #include <mbfl/mbfilter.h> #include <mbfl/mbfilter_pass.h> #include <oniguruma.h> } #define php_mb_re_pattern_buffer re_pattern_buffer #define php_mb_regex_t regex_t #define php_mb_re_registers re_registers extern void mbfl_memory_device_unput(mbfl_memory_device *device); #define PARSE_POST 0 #define PARSE_GET 1 #define PARSE_COOKIE 2 #define PARSE_STRING 3 #define PARSE_ENV 4 #define PARSE_SERVER 5 #define PARSE_SESSION 6 namespace HPHP { /////////////////////////////////////////////////////////////////////////////// // statics #define PHP_MBSTR_STACK_BLOCK_SIZE 32 typedef struct _php_mb_nls_ident_list { mbfl_no_language lang; mbfl_no_encoding* list; int list_size; } php_mb_nls_ident_list; static mbfl_no_encoding php_mb_default_identify_list_ja[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_jis, mbfl_no_encoding_utf8, mbfl_no_encoding_euc_jp, mbfl_no_encoding_sjis }; static mbfl_no_encoding php_mb_default_identify_list_cn[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_euc_cn, mbfl_no_encoding_cp936 }; static mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_euc_tw, mbfl_no_encoding_big5 }; static mbfl_no_encoding php_mb_default_identify_list_kr[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_euc_kr, mbfl_no_encoding_uhc }; static mbfl_no_encoding php_mb_default_identify_list_ru[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_koi8r, mbfl_no_encoding_cp1251, mbfl_no_encoding_cp866 }; static mbfl_no_encoding php_mb_default_identify_list_hy[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_armscii8 }; static mbfl_no_encoding php_mb_default_identify_list_tr[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, mbfl_no_encoding_8859_9 }; static mbfl_no_encoding php_mb_default_identify_list_neut[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8 }; static php_mb_nls_ident_list php_mb_default_identify_list[] = { { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) }, { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) }, { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) }, { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) }, { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) }, { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) }, { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) }, { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) } }; /////////////////////////////////////////////////////////////////////////////// // globals typedef std::map<std::string, php_mb_regex_t *> RegexCache; struct MBGlobals final : RequestEventHandler { mbfl_no_language language; mbfl_no_language current_language; mbfl_encoding *internal_encoding; mbfl_encoding *current_internal_encoding; mbfl_encoding *http_output_encoding; mbfl_encoding *current_http_output_encoding; mbfl_encoding *http_input_identify; mbfl_encoding *http_input_identify_get; mbfl_encoding *http_input_identify_post; mbfl_encoding *http_input_identify_cookie; mbfl_encoding *http_input_identify_string; mbfl_encoding **http_input_list; int http_input_list_size; mbfl_encoding **detect_order_list; int detect_order_list_size; mbfl_encoding **current_detect_order_list; int current_detect_order_list_size; mbfl_no_encoding *default_detect_order_list; int default_detect_order_list_size; int filter_illegal_mode; int filter_illegal_substchar; int current_filter_illegal_mode; int current_filter_illegal_substchar; bool encoding_translation; long strict_detection; long illegalchars; mbfl_buffer_converter *outconv; OnigEncoding default_mbctype; OnigEncoding current_mbctype; RegexCache ht_rc; std::string search_str; unsigned int search_pos; php_mb_regex_t *search_re; OnigRegion *search_regs; OnigOptionType regex_default_options; OnigSyntaxType *regex_default_syntax; MBGlobals() : language(mbfl_no_language_uni), current_language(mbfl_no_language_uni), internal_encoding((mbfl_encoding*) mbfl_no2encoding(mbfl_no_encoding_utf8)), current_internal_encoding(internal_encoding), http_output_encoding((mbfl_encoding*) &mbfl_encoding_pass), current_http_output_encoding((mbfl_encoding*) &mbfl_encoding_pass), http_input_identify(nullptr), http_input_identify_get(nullptr), http_input_identify_post(nullptr), http_input_identify_cookie(nullptr), http_input_identify_string(nullptr), http_input_list(nullptr), http_input_list_size(0), detect_order_list(nullptr), detect_order_list_size(0), current_detect_order_list(nullptr), current_detect_order_list_size(0), default_detect_order_list ((mbfl_no_encoding *)php_mb_default_identify_list_neut), default_detect_order_list_size (sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0])), filter_illegal_mode(MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR), filter_illegal_substchar(0x3f), /* '?' */ current_filter_illegal_mode(MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR), current_filter_illegal_substchar(0x3f), /* '?' */ encoding_translation(0), strict_detection(0), illegalchars(0), outconv(nullptr), default_mbctype(ONIG_ENCODING_UTF8), current_mbctype(ONIG_ENCODING_UTF8), search_pos(0), search_re((php_mb_regex_t*)nullptr), search_regs((OnigRegion*)nullptr), regex_default_options(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE), regex_default_syntax(ONIG_SYNTAX_RUBY) { } void requestInit() override { current_language = language; current_internal_encoding = internal_encoding; current_http_output_encoding = http_output_encoding; current_filter_illegal_mode = filter_illegal_mode; current_filter_illegal_substchar = filter_illegal_substchar; if (!encoding_translation) { illegalchars = 0; } mbfl_encoding **entry = nullptr; int n = 0; if (current_detect_order_list) { return; } if (detect_order_list && detect_order_list_size > 0) { n = detect_order_list_size; entry = (mbfl_encoding **)req::malloc_noptrs(n * sizeof(mbfl_encoding*)); std::copy(detect_order_list, detect_order_list + (n * sizeof(mbfl_encoding*)), entry); } else { mbfl_no_encoding *src = default_detect_order_list; n = default_detect_order_list_size; entry = (mbfl_encoding **)req::malloc_noptrs(n * sizeof(mbfl_encoding*)); for (int i = 0; i < n; i++) { entry[i] = (mbfl_encoding*) mbfl_no2encoding(src[i]); } } current_detect_order_list = entry; current_detect_order_list_size = n; } void requestShutdown() override { if (current_detect_order_list != nullptr) { req::free(current_detect_order_list); current_detect_order_list = nullptr; current_detect_order_list_size = 0; } if (outconv != nullptr) { illegalchars += mbfl_buffer_illegalchars(outconv); mbfl_buffer_converter_delete(outconv); outconv = nullptr; } /* clear http input identification. */ http_input_identify = nullptr; http_input_identify_post = nullptr; http_input_identify_get = nullptr; http_input_identify_cookie = nullptr; http_input_identify_string = nullptr; current_mbctype = default_mbctype; search_str.clear(); search_pos = 0; if (search_regs != nullptr) { onig_region_free(search_regs, 1); search_regs = (OnigRegion *)nullptr; } for (RegexCache::const_iterator it = ht_rc.begin(); it != ht_rc.end(); ++it) { onig_free(it->second); } ht_rc.clear(); } }; IMPLEMENT_STATIC_REQUEST_LOCAL(MBGlobals, s_mb_globals); #define MBSTRG(name) s_mb_globals->name /////////////////////////////////////////////////////////////////////////////// // unicode functions /* * A simple array of 32-bit masks for lookup. */ static unsigned long masks32[32] = { 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800, 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000, 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000, 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000 }; static int prop_lookup(unsigned long code, unsigned long n) { long l, r, m; /* * There is an extra node on the end of the offsets to allow this routine * to work right. If the index is 0xffff, then there are no nodes for the * property. */ if ((l = _ucprop_offsets[n]) == 0xffff) return 0; /* * Locate the next offset that is not 0xffff. The sentinel at the end of * the array is the max index value. */ for (m = 1; n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ; r = _ucprop_offsets[n + m] - 1; while (l <= r) { /* * Determine a "mid" point and adjust to make sure the mid point is at * the beginning of a range pair. */ m = (l + r) >> 1; m -= (m & 1); if (code > _ucprop_ranges[m + 1]) l = m + 2; else if (code < _ucprop_ranges[m]) r = m - 2; else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1]) return 1; } return 0; } static int php_unicode_is_prop(unsigned long code, unsigned long mask1, unsigned long mask2) { unsigned long i; if (mask1 == 0 && mask2 == 0) return 0; for (i = 0; mask1 && i < 32; i++) { if ((mask1 & masks32[i]) && prop_lookup(code, i)) return 1; } for (i = 32; mask2 && i < _ucprop_size; i++) { if ((mask2 & masks32[i & 31]) && prop_lookup(code, i)) return 1; } return 0; } static unsigned long case_lookup(unsigned long code, long l, long r, int field) { long m; /* * Do the binary search. */ while (l <= r) { /* * Determine a "mid" point and adjust to make sure the mid point is at * the beginning of a case mapping triple. */ m = (l + r) >> 1; m -= (m % 3); if (code > _uccase_map[m]) l = m + 3; else if (code < _uccase_map[m]) r = m - 3; else if (code == _uccase_map[m]) return _uccase_map[m + field]; } return code; } static unsigned long php_turkish_toupper(unsigned long code, long l, long r, int field) { if (code == 0x0069L) { return 0x0130L; } return case_lookup(code, l, r, field); } static unsigned long php_turkish_tolower(unsigned long code, long l, long r, int field) { if (code == 0x0049L) { return 0x0131L; } return case_lookup(code, l, r, field); } static unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_encoding enc) { int field; long l, r; if (php_unicode_is_upper(code)) return code; if (php_unicode_is_lower(code)) { /* * The character is lower case. */ field = 2; l = _uccase_len[0]; r = (l + _uccase_len[1]) - 3; if (enc == mbfl_no_encoding_8859_9) { return php_turkish_toupper(code, l, r, field); } } else { /* * The character is title case. */ field = 1; l = _uccase_len[0] + _uccase_len[1]; r = _uccase_size - 3; } return case_lookup(code, l, r, field); } static unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_encoding enc) { int field; long l, r; if (php_unicode_is_lower(code)) return code; if (php_unicode_is_upper(code)) { /* * The character is upper case. */ field = 1; l = 0; r = _uccase_len[0] - 3; if (enc == mbfl_no_encoding_8859_9) { return php_turkish_tolower(code, l, r, field); } } else { /* * The character is title case. */ field = 2; l = _uccase_len[0] + _uccase_len[1]; r = _uccase_size - 3; } return case_lookup(code, l, r, field); } static unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_encoding /*enc*/) { int field; long l, r; if (php_unicode_is_title(code)) return code; /* * The offset will always be the same for converting to title case. */ field = 2; if (php_unicode_is_upper(code)) { /* * The character is upper case. */ l = 0; r = _uccase_len[0] - 3; } else { /* * The character is lower case. */ l = _uccase_len[0]; r = (l + _uccase_len[1]) - 3; } return case_lookup(code, l, r, field); } #define BE_ARY_TO_UINT32(ptr) (\ ((unsigned char*)(ptr))[0]<<24 |\ ((unsigned char*)(ptr))[1]<<16 |\ ((unsigned char*)(ptr))[2]<< 8 |\ ((unsigned char*)(ptr))[3] ) #define UINT32_TO_BE_ARY(ptr,val) { \ unsigned int v = val; \ ((unsigned char*)(ptr))[0] = (v>>24) & 0xff,\ ((unsigned char*)(ptr))[1] = (v>>16) & 0xff,\ ((unsigned char*)(ptr))[2] = (v>> 8) & 0xff,\ ((unsigned char*)(ptr))[3] = (v ) & 0xff;\ } /** * Return 0 if input contains any illegal encoding, otherwise 1. * Even if any illegal encoding is detected the result may contain a list * of parsed encodings. */ static int php_mb_parse_encoding_list(const char* value, int value_length, mbfl_encoding*** return_list, int* return_size, int /*persistent*/) { int n, l, size, bauto, ret = 1; char *p, *p1, *p2, *endp, *tmpstr; mbfl_encoding *encoding; mbfl_no_encoding *src; mbfl_encoding **entry, **list; list = nullptr; if (value == nullptr || value_length <= 0) { if (return_list) { *return_list = nullptr; } if (return_size) { *return_size = 0; } return 0; } else { mbfl_no_encoding *identify_list; int identify_list_size; identify_list = MBSTRG(default_detect_order_list); identify_list_size = MBSTRG(default_detect_order_list_size); /* copy the value string for work */ if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { tmpstr = req::strndup(value + 1, value_length - 2); } else { tmpstr = req::strndup(value, value_length); } value_length = tmpstr ? strlen(tmpstr) : 0; if (!value_length) { req::free(tmpstr); if (return_list) { *return_list = nullptr; } if (return_size) { *return_size = 0; } return 0; } /* count the number of listed encoding names */ endp = tmpstr + value_length; n = 1; p1 = tmpstr; while ((p2 = (char*)string_memnstr(p1, ",", 1, endp)) != nullptr) { p1 = p2 + 1; n++; } size = n + identify_list_size; /* make list */ list = (mbfl_encoding **)req::calloc_noptrs(size, sizeof(mbfl_encoding*)); if (list != nullptr) { entry = list; n = 0; bauto = 0; p1 = tmpstr; do { p2 = p = (char*)string_memnstr(p1, ",", 1, endp); if (p == nullptr) { p = endp; } *p = '\0'; /* trim spaces */ while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { p1++; } p--; while (p > p1 && (*p == ' ' || *p == '\t')) { *p = '\0'; p--; } /* convert to the encoding number and check encoding */ if (strcasecmp(p1, "auto") == 0) { if (!bauto) { bauto = 1; l = identify_list_size; src = identify_list; for (int i = 0; i < l; i++) { *entry++ = (mbfl_encoding*) mbfl_no2encoding(*src++); n++; } } } else { encoding = (mbfl_encoding*) mbfl_name2encoding(p1); if (encoding != nullptr) { *entry++ = encoding; n++; } else { ret = 0; } } p1 = p2 + 1; } while (n < size && p2 != nullptr); if (n > 0) { if (return_list) { *return_list = list; } else { req::free(list); } } else { req::free(list); if (return_list) { *return_list = nullptr; } ret = 0; } if (return_size) { *return_size = n; } } else { if (return_list) { *return_list = nullptr; } if (return_size) { *return_size = 0; } ret = 0; } req::free(tmpstr); } return ret; } static char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, unsigned int *output_len) { mbfl_string string, result, *ret; mbfl_encoding *from_encoding, *to_encoding; mbfl_buffer_converter *convd; int size; mbfl_encoding **list; char *output = nullptr; if (output_len) { *output_len = 0; } if (!input) { return nullptr; } /* new encoding */ if (_to_encoding && strlen(_to_encoding)) { to_encoding = (mbfl_encoding*) mbfl_name2encoding(_to_encoding); if (to_encoding == nullptr) { raise_warning("Unknown encoding \"%s\"", _to_encoding); return nullptr; } } else { to_encoding = MBSTRG(current_internal_encoding); } /* initialize string */ mbfl_string_init(&string); mbfl_string_init(&result); from_encoding = MBSTRG(current_internal_encoding); string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(current_language); string.val = (unsigned char *)input; string.len = length; /* pre-conversion encoding */ if (_from_encodings) { list = nullptr; size = 0; php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0); if (size == 1) { from_encoding = *list; string.no_encoding = from_encoding->no_encoding; } else if (size > 1) { /* auto detect */ from_encoding = (mbfl_encoding*) mbfl_identify_encoding2(&string, (const mbfl_encoding**) list, size, MBSTRG(strict_detection)); if (from_encoding != nullptr) { string.no_encoding = from_encoding->no_encoding; } else { raise_warning("Unable to detect character encoding"); from_encoding = (mbfl_encoding*) &mbfl_encoding_pass; to_encoding = from_encoding; string.no_encoding = from_encoding->no_encoding; } } else { raise_warning("Illegal character encoding specified"); } if (list != nullptr) { req::free(list); } } /* initialize converter */ convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len); if (convd == nullptr) { raise_warning("Unable to create character encoding converter"); return nullptr; } mbfl_buffer_converter_illegal_mode (convd, MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar (convd, MBSTRG(current_filter_illegal_substchar)); /* do it */ ret = mbfl_buffer_converter_feed_result(convd, &string, &result); if (ret) { if (output_len) { *output_len = ret->len; } output = (char *)ret->val; } MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); return output; } static char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, unsigned int *ret_len, const char *src_encoding) { char *unicode, *newstr; unsigned int unicode_len; unsigned char *unicode_ptr; size_t i; enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding); unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len); if (unicode == nullptr) return nullptr; unicode_ptr = (unsigned char *)unicode; switch(case_mode) { case PHP_UNICODE_CASE_UPPER: for (i = 0; i < unicode_len; i+=4) { UINT32_TO_BE_ARY(&unicode_ptr[i], php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding)); } break; case PHP_UNICODE_CASE_LOWER: for (i = 0; i < unicode_len; i+=4) { UINT32_TO_BE_ARY(&unicode_ptr[i], php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding)); } break; case PHP_UNICODE_CASE_TITLE: { int mode = 0; for (i = 0; i < unicode_len; i+=4) { int res = php_unicode_is_prop (BE_ARY_TO_UINT32(&unicode_ptr[i]), UC_MN|UC_ME|UC_CF|UC_LM|UC_SK|UC_LU|UC_LL|UC_LT|UC_PO|UC_OS, 0); if (mode) { if (res) { UINT32_TO_BE_ARY (&unicode_ptr[i], php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding)); } else { mode = 0; } } else { if (res) { mode = 1; UINT32_TO_BE_ARY (&unicode_ptr[i], php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding)); } } } } break; } newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding, "UCS-4BE", ret_len); free(unicode); return newstr; } /////////////////////////////////////////////////////////////////////////////// // helpers /** * Return 0 if input contains any illegal encoding, otherwise 1. * Even if any illegal encoding is detected the result may contain a list * of parsed encodings. */ static int php_mb_parse_encoding_array(const Array& array, mbfl_encoding*** return_list, int* return_size, int /*persistent*/) { int n, l, size, bauto,ret = 1; mbfl_encoding *encoding; mbfl_no_encoding *src; mbfl_encoding **list, **entry; list = nullptr; mbfl_no_encoding *identify_list = MBSTRG(default_detect_order_list); int identify_list_size = MBSTRG(default_detect_order_list_size); size = array.size() + identify_list_size; list = (mbfl_encoding **)req::calloc_noptrs(size, sizeof(mbfl_encoding*)); if (list != nullptr) { entry = list; bauto = 0; n = 0; for (ArrayIter iter(array); iter; ++iter) { auto const hash_entry = iter.second().toString(); if (strcasecmp(hash_entry.data(), "auto") == 0) { if (!bauto) { bauto = 1; l = identify_list_size; src = identify_list; for (int j = 0; j < l; j++) { *entry++ = (mbfl_encoding*) mbfl_no2encoding(*src++); n++; } } } else { encoding = (mbfl_encoding*) mbfl_name2encoding(hash_entry.data()); if (encoding != nullptr) { *entry++ = encoding; n++; } else { ret = 0; } } } if (n > 0) { if (return_list) { *return_list = list; } else { req::free(list); } } else { req::free(list); if (return_list) { *return_list = nullptr; } ret = 0; } if (return_size) { *return_size = n; } } else { if (return_list) { *return_list = nullptr; } if (return_size) { *return_size = 0; } ret = 0; } return ret; } static bool php_mb_parse_encoding(const Variant& encoding, mbfl_encoding ***return_list, int *return_size, bool persistent) { bool ret; if (encoding.isArray()) { ret = php_mb_parse_encoding_array(encoding.toArray(), return_list, return_size, persistent ? 1 : 0); } else { String enc = encoding.toString(); ret = php_mb_parse_encoding_list(enc.data(), enc.size(), return_list, return_size, persistent ? 1 : 0); } if (!ret) { if (return_list && *return_list) { req::free(*return_list); *return_list = nullptr; } return_size = 0; } return ret; } static int php_mb_nls_get_default_detect_order_list(mbfl_no_language lang, mbfl_no_encoding **plist, int* plist_size) { size_t i; *plist = (mbfl_no_encoding *) php_mb_default_identify_list_neut; *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]); for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) { if (php_mb_default_identify_list[i].lang == lang) { *plist = php_mb_default_identify_list[i].list; *plist_size = php_mb_default_identify_list[i].list_size; return 1; } } return 0; } static size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc) { if (enc != nullptr) { if (enc->flag & MBFL_ENCTYPE_MBCS) { if (enc->mblen_table != nullptr) { if (s != nullptr) return enc->mblen_table[*(unsigned char *)s]; } } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { return 2; } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { return 4; } } return 1; } static int php_mb_stripos(int mode, const char *old_haystack, int old_haystack_len, const char *old_needle, int old_needle_len, long offset, const char *from_encoding) { int n; mbfl_string haystack, needle; n = -1; mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(current_language); haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(current_language); needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; do { haystack.val = (unsigned char *)php_unicode_convert_case (PHP_UNICODE_CASE_UPPER, old_haystack, (size_t)old_haystack_len, &haystack.len, from_encoding); if (!haystack.val) { break; } if (haystack.len <= 0) { break; } needle.val = (unsigned char *)php_unicode_convert_case (PHP_UNICODE_CASE_UPPER, old_needle, (size_t)old_needle_len, &needle.len, from_encoding); if (!needle.val) { break; } if (needle.len <= 0) { break; } haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); if (haystack.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", from_encoding); break; } int haystack_char_len = mbfl_strlen(&haystack); if (mode) { if ((offset > 0 && offset > haystack_char_len) || (offset < 0 && -offset > haystack_char_len)) { raise_warning("Offset is greater than the length of haystack string"); break; } } else { if (offset < 0 || offset > haystack_char_len) { raise_warning("Offset not contained in string."); break; } } n = mbfl_strpos(&haystack, &needle, offset, mode); } while(0); if (haystack.val) { free(haystack.val); } if (needle.val) { free(needle.val); } return n; } /////////////////////////////////////////////////////////////////////////////// static String convertArg(const Variant& arg) { return arg.isNull() ? null_string : arg.toString(); } Array HHVM_FUNCTION(mb_list_encodings) { Array ret = Array::CreateVec(); int i = 0; const mbfl_encoding **encodings = mbfl_get_supported_encodings(); const mbfl_encoding *encoding; while ((encoding = encodings[i++]) != nullptr) { ret.append(String(encoding->name, CopyString)); } return ret; } Variant HHVM_FUNCTION(mb_encoding_aliases, const String& name) { const mbfl_encoding *encoding; int i = 0; encoding = mbfl_name2encoding(name.data()); if (!encoding) { raise_warning("mb_encoding_aliases(): Unknown encoding \"%s\"", name.data()); return false; } Array ret = Array::CreateVec(); if (encoding->aliases != nullptr) { while ((*encoding->aliases)[i] != nullptr) { ret.append((*encoding->aliases)[i]); i++; } } return ret; } Variant HHVM_FUNCTION(mb_list_encodings_alias_names, const Variant& opt_name) { const String name = convertArg(opt_name); const mbfl_encoding **encodings; const mbfl_encoding *encoding; mbfl_no_encoding no_encoding; int i, j; Array ret = Array::CreateDict(); if (name.isNull()) { i = 0; encodings = mbfl_get_supported_encodings(); while ((encoding = encodings[i++]) != nullptr) { Array row; if (encoding->aliases != nullptr) { row = Array::CreateVec(); j = 0; while ((*encoding->aliases)[j] != nullptr) { row.append(String((*encoding->aliases)[j], CopyString)); j++; } } ret.set(String(encoding->name, CopyString), row); } } else { no_encoding = mbfl_name2no_encoding(name.data()); if (no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", name.data()); return false; } char *encodingName = (char *)mbfl_no_encoding2name(no_encoding); if (encodingName != nullptr) { i = 0; encodings = mbfl_get_supported_encodings(); while ((encoding = encodings[i++]) != nullptr) { if (strcmp(encoding->name, encodingName) != 0) continue; if (encoding->aliases != nullptr) { j = 0; while ((*encoding->aliases)[j] != nullptr) { ret.append(String((*encoding->aliases)[j], CopyString)); j++; } } break; } } else { return false; } } return ret.empty() ? init_null() : ret; } Variant HHVM_FUNCTION(mb_list_mime_names, const Variant& opt_name) { const String name = convertArg(opt_name); const mbfl_encoding **encodings; const mbfl_encoding *encoding; mbfl_no_encoding no_encoding; int i; Array ret = Array::CreateDict(); if (name.isNull()) { i = 0; encodings = mbfl_get_supported_encodings(); while ((encoding = encodings[i++]) != nullptr) { if (encoding->mime_name != nullptr) { ret.set(String(encoding->name, CopyString), String(encoding->mime_name, CopyString)); } else{ ret.set(String(encoding->name, CopyString), ""); } } } else { no_encoding = mbfl_name2no_encoding(name.data()); if (no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", name.data()); return false; } char *encodingName = (char *)mbfl_no_encoding2name(no_encoding); if (encodingName != nullptr) { i = 0; encodings = mbfl_get_supported_encodings(); while ((encoding = encodings[i++]) != nullptr) { if (strcmp(encoding->name, encodingName) != 0) continue; if (encoding->mime_name != nullptr) { return String(encoding->mime_name, CopyString); } break; } return empty_string_variant(); } else { return false; } } return ret.empty() ? init_null() : ret; } bool HHVM_FUNCTION(mb_check_encoding, const Variant& opt_var, const Variant& opt_encoding) { const String var = convertArg(opt_var); const String encoding = convertArg(opt_encoding); mbfl_buffer_converter *convd; mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbfl_string string, result, *ret = nullptr; long illegalchars = 0; if (var.isNull()) { return MBSTRG(illegalchars) == 0; } if (!encoding.isNull()) { no_encoding = mbfl_name2no_encoding(encoding.data()); if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) { raise_warning("Invalid encoding \"%s\"", encoding.data()); return false; } } convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0); if (convd == nullptr) { raise_warning("Unable to create converter"); return false; } mbfl_buffer_converter_illegal_mode (convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); mbfl_buffer_converter_illegal_substchar (convd, 0); /* initialize string */ mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding); mbfl_string_init(&result); string.val = (unsigned char *)var.data(); string.len = var.size(); ret = mbfl_buffer_converter_feed_result(convd, &string, &result); illegalchars = mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); if (ret != nullptr) { MBSTRG(illegalchars) += illegalchars; if (illegalchars == 0 && string.len == ret->len && memcmp((const char *)string.val, (const char *)ret->val, string.len) == 0) { mbfl_string_clear(&result); return true; } else { mbfl_string_clear(&result); return false; } } else { return false; } } Variant HHVM_FUNCTION(mb_convert_case, const String& str, int mode, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); const char *enc = nullptr; if (encoding.empty()) { enc = MBSTRG(current_internal_encoding)->mime_name; } else { enc = encoding.data(); } unsigned int ret_len; char *newstr = php_unicode_convert_case(mode, str.data(), str.size(), &ret_len, enc); if (newstr) { return String(newstr, ret_len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_convert_encoding, const String& str, const String& to_encoding, const Variant& from_encoding /* = uninit_variant */) { String encoding = from_encoding.toString(); if (from_encoding.isArray()) { StringBuffer _from_encodings; Array encs = from_encoding.toArray(); for (ArrayIter iter(encs); iter; ++iter) { if (!_from_encodings.empty()) { _from_encodings.append(","); } _from_encodings.append(iter.second().toString()); } encoding = _from_encodings.detach(); } unsigned int size; char *ret = php_mb_convert_encoding(str.data(), str.size(), to_encoding.data(), (!encoding.empty() ? encoding.data() : nullptr), &size); if (ret != nullptr) { return String(ret, size, AttachString); } return false; } Variant HHVM_FUNCTION(mb_convert_kana, const String& str, const Variant& opt_option, const Variant& opt_encoding) { const String option = convertArg(opt_option); const String encoding = convertArg(opt_encoding); mbfl_string string, result, *ret; mbfl_string_init(&string); string.no_language = MBSTRG(current_language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str.data(); string.len = str.size(); int opt = 0x900; if (!option.empty()) { const char *p = option.data(); int n = option.size(); int i = 0; opt = 0; while (i < n) { i++; switch (*p++) { case 'A': opt |= 0x1; break; case 'a': opt |= 0x10; break; case 'R': opt |= 0x2; break; case 'r': opt |= 0x20; break; case 'N': opt |= 0x4; break; case 'n': opt |= 0x40; break; case 'S': opt |= 0x8; break; case 's': opt |= 0x80; break; case 'K': opt |= 0x100; break; case 'k': opt |= 0x1000; break; case 'H': opt |= 0x200; break; case 'h': opt |= 0x2000; break; case 'V': opt |= 0x800; break; case 'C': opt |= 0x10000; break; case 'c': opt |= 0x20000; break; case 'M': opt |= 0x100000; break; case 'm': opt |= 0x200000; break; } } } /* encoding */ if (!encoding.empty()) { string.no_encoding = mbfl_name2no_encoding(encoding.data()); if (string.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } ret = mbfl_ja_jp_hantozen(&string, &result, opt); if (ret != nullptr) { if (ret->len > StringData::MaxSize) { raise_warning("String too long, max is %d", StringData::MaxSize); return false; } return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } static bool php_mbfl_encoding_detect(const Variant& var, mbfl_encoding_detector *identd, mbfl_string *string) { if (var.isArray() || var.is(KindOfObject)) { Array items = var.toArray(); for (ArrayIter iter(items); iter; ++iter) { if (php_mbfl_encoding_detect(iter.second(), identd, string)) { return true; } } } else if (var.isString()) { String svar = var.toString(); string->val = (unsigned char *)svar.data(); string->len = svar.size(); if (mbfl_encoding_detector_feed(identd, string)) { return true; } } return false; } static Variant php_mbfl_convert(const Variant& var, mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result) { if (var.isArray()) { Array ret = empty_dict_array(); Array items = var.toArray(); for (ArrayIter iter(items); iter; ++iter) { ret.set(iter.first(), php_mbfl_convert(iter.second(), convd, string, result)); } return ret; } if (var.is(KindOfObject)) { Object obj = var.toObject(); Array items = var.toArray(); for (ArrayIter iter(items); iter; ++iter) { obj->o_set(iter.first().toString(), php_mbfl_convert(iter.second(), convd, string, result)); } return var; // which still has obj } if (var.isString()) { String svar = var.toString(); string->val = (unsigned char *)svar.data(); string->len = svar.size(); mbfl_string *ret = mbfl_buffer_converter_feed_result(convd, string, result); return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return var; } Variant HHVM_FUNCTION(mb_convert_variables, const String& to_encoding, const Variant& from_encoding, Variant& vars, const Array& args /* = null_array */) { mbfl_string string, result; mbfl_encoding *_from_encoding, *_to_encoding; mbfl_encoding_detector *identd; mbfl_buffer_converter *convd; int elistsz; mbfl_encoding **elist; char *name; /* new encoding */ _to_encoding = (mbfl_encoding*) mbfl_name2encoding(to_encoding.data()); if (_to_encoding == nullptr) { raise_warning("Unknown encoding \"%s\"", to_encoding.data()); return false; } /* initialize string */ mbfl_string_init(&string); mbfl_string_init(&result); _from_encoding = MBSTRG(current_internal_encoding); string.no_encoding = _from_encoding->no_encoding; string.no_language = MBSTRG(current_language); /* pre-conversion encoding */ elist = nullptr; elistsz = 0; php_mb_parse_encoding(from_encoding, &elist, &elistsz, false); if (elistsz <= 0) { _from_encoding = (mbfl_encoding*) &mbfl_encoding_pass; } else if (elistsz == 1) { _from_encoding = *elist; } else { /* auto detect */ _from_encoding = nullptr; identd = mbfl_encoding_detector_new2((const mbfl_encoding**) elist, elistsz, MBSTRG(strict_detection)); if (identd != nullptr) { for (int n = -1; n < args.size(); n++) { if (php_mbfl_encoding_detect(n < 0 ? vars : args[n], identd, &string)) { break; } } _from_encoding = (mbfl_encoding*) mbfl_encoding_detector_judge2(identd); mbfl_encoding_detector_delete(identd); } if (_from_encoding == nullptr) { raise_warning("Unable to detect encoding"); _from_encoding = (mbfl_encoding*) &mbfl_encoding_pass; } } if (elist != nullptr) { req::free(elist); } /* create converter */ convd = nullptr; if (_from_encoding != &mbfl_encoding_pass) { convd = mbfl_buffer_converter_new2(_from_encoding, _to_encoding, 0); if (convd == nullptr) { raise_warning("Unable to create converter"); return false; } mbfl_buffer_converter_illegal_mode (convd, MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar (convd, MBSTRG(current_filter_illegal_substchar)); } /* convert */ if (convd != nullptr) { vars = php_mbfl_convert(vars, convd, &string, &result); for (int n = 0; n < args.size(); n++) { const_cast<Array&>(args).set(n, php_mbfl_convert(args[n], convd, &string, &result)); } MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); } if (_from_encoding != nullptr) { name = (char*) _from_encoding->name; if (name != nullptr) { return String(name, CopyString); } } return false; } Variant HHVM_FUNCTION(mb_decode_mimeheader, const String& str) { mbfl_string string, result, *ret; mbfl_string_init(&string); string.no_language = MBSTRG(current_language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str.data(); string.len = str.size(); mbfl_string_init(&result); ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding); if (ret != nullptr) { return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } static Variant php_mb_numericentity_exec(const String& str, const Variant& convmap, const String& encoding, bool is_hex, int type) { int mapsize=0; mbfl_string string, result, *ret; mbfl_no_encoding no_encoding; mbfl_string_init(&string); string.no_language = MBSTRG(current_language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str.data(); string.len = str.size(); if (type == 0 && is_hex) { type = 2; /* output in hex format */ } /* encoding */ if (!encoding.empty()) { no_encoding = mbfl_name2no_encoding(encoding.data()); if (no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } else { string.no_encoding = no_encoding; } } /* conversion map */ int *iconvmap = nullptr; if (convmap.isArray()) { Array convs = convmap.toArray(); mapsize = convs.size(); if (mapsize > 0) { iconvmap = (int*)req::malloc_noptrs(mapsize * sizeof(int)); int *mapelm = iconvmap; for (ArrayIter iter(convs); iter; ++iter) { *mapelm++ = iter.second().toInt32(); } } } if (iconvmap == nullptr) { return false; } mapsize /= 4; ret = mbfl_html_numeric_entity(&string, &result, iconvmap, mapsize, type); req::free(iconvmap); if (ret != nullptr) { if (ret->len > StringData::MaxSize) { raise_warning("String too long, max is %d", StringData::MaxSize); return false; } return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_decode_numericentity, const String& str, const Variant& convmap, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); return php_mb_numericentity_exec(str, convmap, encoding, false, 1); } Variant HHVM_FUNCTION(mb_detect_encoding, const String& str, const Variant& encoding_list /* = uninit_variant */, const Variant& strict /* = uninit_variant */) { mbfl_string string; mbfl_encoding *ret; mbfl_encoding **elist, **list; int size; /* make encoding list */ list = nullptr; size = 0; php_mb_parse_encoding(encoding_list, &list, &size, false); if (size > 0 && list != nullptr) { elist = list; } else { elist = MBSTRG(current_detect_order_list); size = MBSTRG(current_detect_order_list_size); } long nstrict = 0; if (!strict.isNull()) { nstrict = strict.toInt64(); } else { nstrict = MBSTRG(strict_detection); } mbfl_string_init(&string); string.no_language = MBSTRG(current_language); string.val = (unsigned char *)str.data(); string.len = str.size(); ret = (mbfl_encoding*) mbfl_identify_encoding2(&string, (const mbfl_encoding**) elist, size, nstrict); req::free(list); if (ret != nullptr) { return String(ret->name, CopyString); } return false; } Variant HHVM_FUNCTION(mb_detect_order, const Variant& encoding_list /* = uninit_variant */) { int n, size; mbfl_encoding **list, **entry; if (encoding_list.isNull()) { Array ret = Array::CreateVec(); entry = MBSTRG(current_detect_order_list); n = MBSTRG(current_detect_order_list_size); while (n > 0) { char *name = (char*) (*entry)->name; if (name) { ret.append(String(name, CopyString)); } entry++; n--; } return ret.empty() ? init_null() : ret; } list = nullptr; size = 0; if (!php_mb_parse_encoding(encoding_list, &list, &size, false) || list == nullptr) { return false; } if (MBSTRG(current_detect_order_list)) { req::free(MBSTRG(current_detect_order_list)); } MBSTRG(current_detect_order_list) = list; MBSTRG(current_detect_order_list_size) = size; return true; } Variant HHVM_FUNCTION(mb_encode_mimeheader, const String& str, const Variant& opt_charset, const Variant& opt_transfer_encoding, const String& linefeed /* = "\r\n" */, int indent /* = 0 */) { const String charset = convertArg(opt_charset); const String transfer_encoding = convertArg(opt_transfer_encoding); mbfl_no_encoding charsetenc, transenc; mbfl_string string, result, *ret; mbfl_string_init(&string); string.no_language = MBSTRG(current_language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str.data(); string.len = str.size(); charsetenc = mbfl_no_encoding_pass; transenc = mbfl_no_encoding_base64; if (!charset.empty()) { charsetenc = mbfl_name2no_encoding(charset.data()); if (charsetenc == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", charset.data()); return false; } } else { const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language)); if (lang != nullptr) { charsetenc = lang->mail_charset; transenc = lang->mail_header_encoding; } } if (!transfer_encoding.empty()) { char ch = *transfer_encoding.data(); if (ch == 'B' || ch == 'b') { transenc = mbfl_no_encoding_base64; } else if (ch == 'Q' || ch == 'q') { transenc = mbfl_no_encoding_qprint; } } mbfl_string_init(&result); ret = mbfl_mime_header_encode(&string, &result, charsetenc, transenc, linefeed.data(), indent); if (ret != nullptr) { if (ret->len > StringData::MaxSize) { raise_warning("String too long, max is %d", StringData::MaxSize); return false; } return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_encode_numericentity, const String& str, const Variant& convmap, const Variant& opt_encoding /* = uninit_variant */, bool is_hex /* = false */) { const String encoding = convertArg(opt_encoding); return php_mb_numericentity_exec(str, convmap, encoding, is_hex, 0); } const StaticString s_internal_encoding("internal_encoding"), s_http_input("http_input"), s_http_output("http_output"), s_mail_charset("mail_charset"), s_mail_header_encoding("mail_header_encoding"), s_mail_body_encoding("mail_body_encoding"), s_illegal_chars("illegal_chars"), s_encoding_translation("encoding_translation"), s_On("On"), s_Off("Off"), s_language("language"), s_detect_order("detect_order"), s_substitute_character("substitute_character"), s_strict_detection("strict_detection"), s_none("none"), s_long("long"), s_entity("entity"); Variant HHVM_FUNCTION(mb_get_info, const Variant& opt_type) { const String type = convertArg(opt_type); const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language)); mbfl_encoding **entry; int n; char *name; if (type.empty() || strcasecmp(type.data(), "all") == 0) { Array ret = Array::CreateDict(); if (MBSTRG(current_internal_encoding) != nullptr && (name = (char *) MBSTRG(current_internal_encoding)->name) != nullptr) { ret.set(s_internal_encoding, String(name, CopyString)); } if (MBSTRG(http_input_identify) != nullptr && (name = (char *)MBSTRG(http_input_identify)->name) != nullptr) { ret.set(s_http_input, String(name, CopyString)); } if (MBSTRG(current_http_output_encoding) != nullptr && (name = (char *)MBSTRG(current_http_output_encoding)->name) != nullptr) { ret.set(s_http_output, String(name, CopyString)); } if (lang != nullptr) { if ((name = (char *)mbfl_no_encoding2name (lang->mail_charset)) != nullptr) { ret.set(s_mail_charset, String(name, CopyString)); } if ((name = (char *)mbfl_no_encoding2name (lang->mail_header_encoding)) != nullptr) { ret.set(s_mail_header_encoding, String(name, CopyString)); } if ((name = (char *)mbfl_no_encoding2name (lang->mail_body_encoding)) != nullptr) { ret.set(s_mail_body_encoding, String(name, CopyString)); } } ret.set(s_illegal_chars, MBSTRG(illegalchars)); ret.set(s_encoding_translation, MBSTRG(encoding_translation) ? s_On : s_Off); if ((name = (char *)mbfl_no_language2name (MBSTRG(current_language))) != nullptr) { ret.set(s_language, String(name, CopyString)); } n = MBSTRG(current_detect_order_list_size); entry = MBSTRG(current_detect_order_list); if (n > 0) { Array row = Array::CreateVec(); while (n > 0) { if ((name = (char *)(*entry)->name) != nullptr) { row.append(String(name, CopyString)); } entry++; n--; } ret.set(s_detect_order, row.empty() ? init_null() : row); } switch (MBSTRG(current_filter_illegal_mode)) { case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE: ret.set(s_substitute_character, s_none); break; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG: ret.set(s_substitute_character, s_long); break; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: ret.set(s_substitute_character, s_entity); break; default: ret.set(s_substitute_character, MBSTRG(current_filter_illegal_substchar)); } ret.set(s_strict_detection, MBSTRG(strict_detection) ? s_On : s_Off); return ret; } else if (strcasecmp(type.data(), "internal_encoding") == 0) { if (MBSTRG(current_internal_encoding) != nullptr && (name = (char *)MBSTRG(current_internal_encoding)->name) != nullptr) { return String(name, CopyString); } } else if (strcasecmp(type.data(), "http_input") == 0) { if (MBSTRG(http_input_identify) != nullptr && (name = (char *)MBSTRG(http_input_identify)->name) != nullptr) { return String(name, CopyString); } } else if (strcasecmp(type.data(), "http_output") == 0) { if (MBSTRG(current_http_output_encoding) != nullptr && (name = (char *)MBSTRG(current_http_output_encoding)->name) != nullptr) { return String(name, CopyString); } } else if (strcasecmp(type.data(), "mail_charset") == 0) { if (lang != nullptr && (name = (char *)mbfl_no_encoding2name (lang->mail_charset)) != nullptr) { return String(name, CopyString); } } else if (strcasecmp(type.data(), "mail_header_encoding") == 0) { if (lang != nullptr && (name = (char *)mbfl_no_encoding2name (lang->mail_header_encoding)) != nullptr) { return String(name, CopyString); } } else if (strcasecmp(type.data(), "mail_body_encoding") == 0) { if (lang != nullptr && (name = (char *)mbfl_no_encoding2name (lang->mail_body_encoding)) != nullptr) { return String(name, CopyString); } } else if (strcasecmp(type.data(), "illegal_chars") == 0) { return MBSTRG(illegalchars); } else if (strcasecmp(type.data(), "encoding_translation") == 0) { return MBSTRG(encoding_translation) ? "On" : "Off"; } else if (strcasecmp(type.data(), "language") == 0) { if ((name = (char *)mbfl_no_language2name (MBSTRG(current_language))) != nullptr) { return String(name, CopyString); } } else if (strcasecmp(type.data(), "detect_order") == 0) { n = MBSTRG(current_detect_order_list_size); entry = MBSTRG(current_detect_order_list); if (n > 0) { Array ret = Array::CreateVec(); while (n > 0) { name = (char *)(*entry)->name; if (name) { ret.append(String(name, CopyString)); } entry++; n--; } } } else if (strcasecmp(type.data(), "substitute_character") == 0) { if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { return s_none; } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { return s_long; } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { return s_entity; } else { return MBSTRG(current_filter_illegal_substchar); } } else if (strcasecmp(type.data(), "strict_detection") == 0) { return MBSTRG(strict_detection) ? s_On : s_Off; } return false; } Variant HHVM_FUNCTION(mb_http_input, const Variant& opt_type) { const String type = convertArg(opt_type); int n; char *name; mbfl_encoding **entry; mbfl_encoding *result = nullptr; if (type.empty()) { result = MBSTRG(http_input_identify); } else { switch (*type.data()) { case 'G': case 'g': result = MBSTRG(http_input_identify_get); break; case 'P': case 'p': result = MBSTRG(http_input_identify_post); break; case 'C': case 'c': result = MBSTRG(http_input_identify_cookie); break; case 'S': case 's': result = MBSTRG(http_input_identify_string); break; case 'I': case 'i': { Array ret = Array::CreateVec(); entry = MBSTRG(http_input_list); n = MBSTRG(http_input_list_size); while (n > 0) { name = (char *)(*entry)->name; if (name) { ret.append(String(name, CopyString)); } entry++; n--; } return ret.empty() ? init_null() : ret; } case 'L': case 'l': { entry = MBSTRG(http_input_list); n = MBSTRG(http_input_list_size); StringBuffer list; while (n > 0) { name = (char *)(*entry)->name; if (name) { if (list.empty()) { list.append(name); } else { list.append(','); list.append(name); } } entry++; n--; } if (list.empty()) { return false; } return list.detach(); } default: result = MBSTRG(http_input_identify); break; } } if (result != nullptr && (name = (char *)(result)->name) != nullptr) { return String(name, CopyString); } return false; } Variant HHVM_FUNCTION(mb_http_output, const Variant& opt_encoding) { const String encoding_name = convertArg(opt_encoding); if (encoding_name.empty()) { char *name = (char *)(MBSTRG(current_http_output_encoding)->name); if (name != nullptr) { return String(name, CopyString); } return false; } mbfl_encoding *encoding = (mbfl_encoding*) mbfl_name2encoding(encoding_name.data()); if (encoding == nullptr) { raise_warning("Unknown encoding \"%s\"", encoding_name.data()); return false; } MBSTRG(current_http_output_encoding) = encoding; return true; } Variant HHVM_FUNCTION(mb_internal_encoding, const Variant& opt_encoding) { const String encoding_name = convertArg(opt_encoding); if (encoding_name.empty()) { char *name = (char *)(MBSTRG(current_internal_encoding)->name); if (name != nullptr) { return String(name, CopyString); } return false; } mbfl_encoding *encoding = (mbfl_encoding*) mbfl_name2encoding(encoding_name.data()); if (encoding == nullptr) { raise_warning("Unknown encoding \"%s\"", encoding_name.data()); return false; } MBSTRG(current_internal_encoding) = encoding; return true; } Variant HHVM_FUNCTION(mb_language, const Variant& opt_language) { const String language = convertArg(opt_language); if (language.empty()) { return String(mbfl_no_language2name(MBSTRG(current_language)), CopyString); } mbfl_no_language no_language = mbfl_name2no_language(language.data()); if (no_language == mbfl_no_language_invalid) { raise_warning("Unknown language \"%s\"", language.data()); return false; } php_mb_nls_get_default_detect_order_list (no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size)); MBSTRG(current_language) = no_language; return true; } String HHVM_FUNCTION(mb_output_handler, const String& contents, int status) { mbfl_string string, result; int last_feed; mbfl_encoding *encoding = MBSTRG(current_http_output_encoding); /* start phase only */ if (status & k_PHP_OUTPUT_HANDLER_START) { /* delete the converter just in case. */ if (MBSTRG(outconv)) { MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); mbfl_buffer_converter_delete(MBSTRG(outconv)); MBSTRG(outconv) = nullptr; } if (encoding == nullptr) { return contents; } /* analyze mime type */ String mimetype = g_context->getMimeType(); if (!mimetype.empty()) { const char *charset = encoding->mime_name; if (charset) { g_context->setContentType(mimetype, charset); } /* activate the converter */ MBSTRG(outconv) = mbfl_buffer_converter_new2 (MBSTRG(current_internal_encoding), encoding, 0); } } /* just return if the converter is not activated. */ if (MBSTRG(outconv) == nullptr) { return contents; } /* flag */ last_feed = ((status & k_PHP_OUTPUT_HANDLER_END) != 0); /* mode */ mbfl_buffer_converter_illegal_mode (MBSTRG(outconv), MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar (MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar)); /* feed the string */ mbfl_string_init(&string); string.no_language = MBSTRG(current_language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)contents.data(); string.len = contents.size(); mbfl_buffer_converter_feed(MBSTRG(outconv), &string); if (last_feed) { mbfl_buffer_converter_flush(MBSTRG(outconv)); } /* get the converter output, and return it */ mbfl_buffer_converter_result(MBSTRG(outconv), &result); /* delete the converter if it is the last feed. */ if (last_feed) { MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); mbfl_buffer_converter_delete(MBSTRG(outconv)); MBSTRG(outconv) = nullptr; } return String(reinterpret_cast<char*>(result.val), result.len, AttachString); } typedef struct _php_mb_encoding_handler_info_t { int data_type; const char *separator; unsigned int force_register_globals: 1; unsigned int report_errors: 1; enum mbfl_no_language to_language; mbfl_encoding *to_encoding; enum mbfl_no_language from_language; int num_from_encodings; mbfl_encoding **from_encodings; } php_mb_encoding_handler_info_t; static mbfl_encoding* _php_mb_encoding_handler_ex (const php_mb_encoding_handler_info_t *info, Array& arg, char *res) { char *var, *val; const char *s1, *s2; char *strtok_buf = nullptr, **val_list = nullptr; int n, num, *len_list = nullptr; unsigned int val_len; mbfl_string string, resvar, resval; mbfl_encoding *from_encoding = nullptr; mbfl_encoding_detector *identd = nullptr; mbfl_buffer_converter *convd = nullptr; mbfl_string_init_set(&string, info->to_language, info->to_encoding->no_encoding); mbfl_string_init_set(&resvar, info->to_language, info->to_encoding->no_encoding); mbfl_string_init_set(&resval, info->to_language, info->to_encoding->no_encoding); if (!res || *res == '\0') { goto out; } /* count the variables(separators) contained in the "res". * separator may contain multiple separator chars. */ num = 1; for (s1=res; *s1 != '\0'; s1++) { for (s2=info->separator; *s2 != '\0'; s2++) { if (*s1 == *s2) { num++; } } } num *= 2; /* need space for variable name and value */ val_list = (char **)req::calloc_noptrs(num, sizeof(char *)); len_list = (int *)req::calloc_noptrs(num, sizeof(int)); /* split and decode the query */ n = 0; strtok_buf = nullptr; var = strtok_r(res, info->separator, &strtok_buf); while (var) { val = strchr(var, '='); if (val) { /* have a value */ len_list[n] = url_decode_ex(var, val-var); val_list[n] = var; n++; *val++ = '\0'; val_list[n] = val; len_list[n] = url_decode_ex(val, strlen(val)); } else { len_list[n] = url_decode_ex(var, strlen(var)); val_list[n] = var; n++; val_list[n] = const_cast<char*>(""); len_list[n] = 0; } n++; var = strtok_r(nullptr, info->separator, &strtok_buf); } num = n; /* make sure to process initilized vars only */ /* initialize converter */ if (info->num_from_encodings <= 0) { from_encoding = (mbfl_encoding*) &mbfl_encoding_pass; } else if (info->num_from_encodings == 1) { from_encoding = info->from_encodings[0]; } else { /* auto detect */ from_encoding = nullptr; identd = mbfl_encoding_detector_new ((enum mbfl_no_encoding *)info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection)); if (identd) { n = 0; while (n < num) { string.val = (unsigned char *)val_list[n]; string.len = len_list[n]; if (mbfl_encoding_detector_feed(identd, &string)) { break; } n++; } from_encoding = (mbfl_encoding*) mbfl_encoding_detector_judge2(identd); mbfl_encoding_detector_delete(identd); } if (from_encoding == nullptr) { if (info->report_errors) { raise_warning("Unable to detect encoding"); } from_encoding = (mbfl_encoding*) &mbfl_encoding_pass; } } convd = nullptr; if (from_encoding != (mbfl_encoding*) &mbfl_encoding_pass) { convd = mbfl_buffer_converter_new2(from_encoding, info->to_encoding, 0); if (convd != nullptr) { mbfl_buffer_converter_illegal_mode (convd, MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar (convd, MBSTRG(current_filter_illegal_substchar)); } else { if (info->report_errors) { raise_warning("Unable to create converter"); } goto out; } } /* convert encoding */ string.no_encoding = from_encoding->no_encoding; n = 0; while (n < num) { string.val = (unsigned char *)val_list[n]; string.len = len_list[n]; if (convd != nullptr && mbfl_buffer_converter_feed_result(convd, &string, &resvar) != nullptr) { var = (char *)resvar.val; } else { var = val_list[n]; } n++; string.val = (unsigned char *)val_list[n]; string.len = len_list[n]; if (convd != nullptr && mbfl_buffer_converter_feed_result(convd, &string, &resval) != nullptr) { val = (char *)resval.val; val_len = resval.len; } else { val = val_list[n]; val_len = len_list[n]; } n++; if (val_len > 0) { arg.set(String(var, CopyString), String(val, val_len, CopyString)); } if (convd != nullptr) { mbfl_string_clear(&resvar); mbfl_string_clear(&resval); } } out: if (convd != nullptr) { MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); } if (val_list != nullptr) { req::free((void *)val_list); } if (len_list != nullptr) { req::free((void *)len_list); } return from_encoding; } bool HHVM_FUNCTION(mb_parse_str, const String& encoded_string, Array& result) { php_mb_encoding_handler_info_t info; info.data_type = PARSE_STRING; info.separator = "&"; info.force_register_globals = false; info.report_errors = 1; info.to_encoding = MBSTRG(current_internal_encoding); info.to_language = MBSTRG(current_language); info.from_encodings = MBSTRG(http_input_list); info.num_from_encodings = MBSTRG(http_input_list_size); info.from_language = MBSTRG(current_language); char *encstr = req::strndup(encoded_string.data(), encoded_string.size()); result = Array::CreateDict(); mbfl_encoding *detected = _php_mb_encoding_handler_ex(&info, result, encstr); req::free(encstr); MBSTRG(http_input_identify) = detected; return detected != nullptr; } Variant HHVM_FUNCTION(mb_preferred_mime_name, const String& encoding) { mbfl_no_encoding no_encoding = mbfl_name2no_encoding(encoding.data()); if (no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding); if (preferred_name == nullptr || *preferred_name == '\0') { raise_warning("No MIME preferred name corresponding to \"%s\"", encoding.data()); return false; } return String(preferred_name, CopyString); } static Variant php_mb_substr(const String& str, int from, const Variant& vlen, const String& encoding, bool substr) { mbfl_string string; mbfl_string_init(&string); string.no_language = MBSTRG(current_language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str.data(); string.len = str.size(); if (!encoding.empty()) { string.no_encoding = mbfl_name2no_encoding(encoding.data()); if (string.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } int len = vlen.toInt64(); int size = 0; if (substr) { int size_tmp = -1; if (vlen.isNull() || len == 0x7FFFFFFF) { size_tmp = mbfl_strlen(&string); len = size_tmp; } if (from < 0 || len < 0) { size = size_tmp < 0 ? mbfl_strlen(&string) : size_tmp; } } else { size = str.size(); if (vlen.isNull() || len == 0x7FFFFFFF) { len = size; } } /* if "from" position is negative, count start position from the end * of the string */ if (from < 0) { from = size + from; if (from < 0) { from = 0; } } /* if "length" position is negative, set it to the length * needed to stop that many chars from the end of the string */ if (len < 0) { len = (size - from) + len; if (len < 0) { len = 0; } } if (!substr && from > size) { return false; } mbfl_string result; mbfl_string *ret; if (substr) { ret = mbfl_substr(&string, &result, from, len); } else { ret = mbfl_strcut(&string, &result, from, len); } if (ret != nullptr) { return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_substr, const String& str, int start, const Variant& length /*= uninit_null() */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); return php_mb_substr(str, start, length, encoding, true); } Variant HHVM_FUNCTION(mb_strcut, const String& str, int start, const Variant& length /*= uninit_null() */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); return php_mb_substr(str, start, length, encoding, false); } Variant HHVM_FUNCTION(mb_strimwidth, const String& str, int start, int width, const Variant& opt_trimmarker, const Variant& opt_encoding) { const String trimmarker = convertArg(opt_trimmarker); const String encoding = convertArg(opt_encoding); mbfl_string string, result, marker, *ret; mbfl_string_init(&string); mbfl_string_init(&marker); string.no_language = MBSTRG(current_language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; marker.no_language = MBSTRG(current_language); marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; marker.val = nullptr; marker.len = 0; if (!encoding.empty()) { string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding.data()); if (string.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } string.val = (unsigned char *)str.data(); string.len = str.size(); if (start < 0 || start > str.size()) { raise_warning("Start position is out of reange"); return false; } if (width < 0) { raise_warning("Width is negative value"); return false; } marker.val = (unsigned char *)trimmarker.data(); marker.len = trimmarker.size(); ret = mbfl_strimwidth(&string, &marker, &result, start, width); if (ret != nullptr) { return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_stripos, const String& haystack, const String& needle, int offset /* = 0 */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); const char *from_encoding; if (encoding.empty()) { from_encoding = MBSTRG(current_internal_encoding)->mime_name; } else { from_encoding = encoding.data(); } if (needle.empty()) { raise_warning("Empty delimiter"); return false; } int n = php_mb_stripos(0, haystack.data(), haystack.size(), needle.data(), needle.size(), offset, from_encoding); if (n >= 0) { return n; } return false; } Variant HHVM_FUNCTION(mb_strripos, const String& haystack, const String& needle, int offset /* = 0 */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); const char *from_encoding; if (encoding.empty()) { from_encoding = MBSTRG(current_internal_encoding)->mime_name; } else { from_encoding = encoding.data(); } int n = php_mb_stripos(1, haystack.data(), haystack.size(), needle.data(), needle.size(), offset, from_encoding); if (n >= 0) { return n; } return false; } Variant HHVM_FUNCTION(mb_stristr, const String& haystack, const String& needle, bool part /* = false */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string mbs_haystack; mbfl_string_init(&mbs_haystack); mbs_haystack.no_language = MBSTRG(current_language); mbs_haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_haystack.val = (unsigned char *)haystack.data(); mbs_haystack.len = haystack.size(); mbfl_string mbs_needle; mbfl_string_init(&mbs_needle); mbs_needle.no_language = MBSTRG(current_language); mbs_needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_needle.val = (unsigned char *)needle.data(); mbs_needle.len = needle.size(); if (!mbs_needle.len) { raise_warning("Empty delimiter."); return false; } const char *from_encoding; if (encoding.empty()) { from_encoding = MBSTRG(current_internal_encoding)->mime_name; } else { from_encoding = encoding.data(); } mbs_haystack.no_encoding = mbs_needle.no_encoding = mbfl_name2no_encoding(from_encoding); if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", from_encoding); return false; } int n = php_mb_stripos(0, (const char*)mbs_haystack.val, mbs_haystack.len, (const char *)mbs_needle.val, mbs_needle.len, 0, from_encoding); if (n < 0) { return false; } int mblen = mbfl_strlen(&mbs_haystack); mbfl_string result, *ret = nullptr; if (part) { ret = mbfl_substr(&mbs_haystack, &result, 0, n); } else { int len = (mblen - n); ret = mbfl_substr(&mbs_haystack, &result, n, len); } if (ret != nullptr) { return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_strlen, const String& str, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string string; mbfl_string_init(&string); string.val = (unsigned char *)str.data(); string.len = str.size(); string.no_language = MBSTRG(current_language); if (encoding.empty()) { string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; } else { string.no_encoding = mbfl_name2no_encoding(encoding.data()); if (string.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } int n = mbfl_strlen(&string); if (n >= 0) { return n; } return false; } Variant HHVM_FUNCTION(mb_strpos, const String& haystack, const String& needle, int offset /* = 0 */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string mbs_haystack; mbfl_string_init(&mbs_haystack); mbs_haystack.no_language = MBSTRG(current_language); mbs_haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_haystack.val = (unsigned char *)haystack.data(); mbs_haystack.len = haystack.size(); mbfl_string mbs_needle; mbfl_string_init(&mbs_needle); mbs_needle.no_language = MBSTRG(current_language); mbs_needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_needle.val = (unsigned char *)needle.data(); mbs_needle.len = needle.size(); if (!encoding.empty()) { mbs_haystack.no_encoding = mbs_needle.no_encoding = mbfl_name2no_encoding(encoding.data()); if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } if (offset < 0 || offset > mbfl_strlen(&mbs_haystack)) { raise_warning("Offset not contained in string."); return false; } if (mbs_needle.len == 0) { raise_warning("Empty delimiter."); return false; } int reverse = 0; int n = mbfl_strpos(&mbs_haystack, &mbs_needle, offset, reverse); if (n >= 0) { return n; } switch (-n) { case 1: break; case 2: raise_warning("Needle has not positive length."); break; case 4: raise_warning("Unknown encoding or conversion error."); break; case 8: raise_warning("Argument is empty."); break; default: raise_warning("Unknown error in mb_strpos."); break; } return false; } Variant HHVM_FUNCTION(mb_strrpos, const String& haystack, const String& needle, const Variant& offset /* = 0LL */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string mbs_haystack; mbfl_string_init(&mbs_haystack); mbs_haystack.no_language = MBSTRG(current_language); mbs_haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_haystack.val = (unsigned char *)haystack.data(); mbs_haystack.len = haystack.size(); mbfl_string mbs_needle; mbfl_string_init(&mbs_needle); mbs_needle.no_language = MBSTRG(current_language); mbs_needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_needle.val = (unsigned char *)needle.data(); mbs_needle.len = needle.size(); // This hack is so that if the caller puts the encoding in the offset field we // attempt to detect it and use that as the encoding. Ick. const char *enc_name = encoding.data(); long noffset = 0; String soffset = offset.toString(); if (offset.isString()) { enc_name = soffset.data(); int str_flg = 1; if (enc_name != nullptr) { switch (*enc_name) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ' ': case '-': case '.': break; default : str_flg = 0; break; } } if (str_flg) { noffset = offset.toInt32(); enc_name = encoding.data(); } } else { noffset = offset.toInt32(); } if (enc_name != nullptr && *enc_name) { mbs_haystack.no_encoding = mbs_needle.no_encoding = mbfl_name2no_encoding(enc_name); if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", enc_name); return false; } } if (mbs_haystack.len <= 0) { return false; } if (mbs_needle.len <= 0) { return false; } if ((noffset > 0 && noffset > mbfl_strlen(&mbs_haystack)) || (noffset < 0 && -noffset > mbfl_strlen(&mbs_haystack))) { raise_notice("Offset is greater than the length of haystack string"); return false; } int n = mbfl_strpos(&mbs_haystack, &mbs_needle, noffset, 1); if (n >= 0) { return n; } return false; } Variant HHVM_FUNCTION(mb_strrchr, const String& haystack, const String& needle, bool part /* = false */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string mbs_haystack; mbfl_string_init(&mbs_haystack); mbs_haystack.no_language = MBSTRG(current_language); mbs_haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_haystack.val = (unsigned char *)haystack.data(); mbs_haystack.len = haystack.size(); mbfl_string mbs_needle; mbfl_string_init(&mbs_needle); mbs_needle.no_language = MBSTRG(current_language); mbs_needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_needle.val = (unsigned char *)needle.data(); mbs_needle.len = needle.size(); if (!encoding.empty()) { mbs_haystack.no_encoding = mbs_needle.no_encoding = mbfl_name2no_encoding(encoding.data()); if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } if (mbs_haystack.len <= 0) { return false; } if (mbs_needle.len <= 0) { return false; } mbfl_string result, *ret = nullptr; int n = mbfl_strpos(&mbs_haystack, &mbs_needle, 0, 1); if (n >= 0) { int mblen = mbfl_strlen(&mbs_haystack); if (part) { ret = mbfl_substr(&mbs_haystack, &result, 0, n); } else { int len = (mblen - n); ret = mbfl_substr(&mbs_haystack, &result, n, len); } } if (ret != nullptr) { return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_strrichr, const String& haystack, const String& needle, bool part /* = false */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string mbs_haystack; mbfl_string_init(&mbs_haystack); mbs_haystack.no_language = MBSTRG(current_language); mbs_haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_haystack.val = (unsigned char *)haystack.data(); mbs_haystack.len = haystack.size(); mbfl_string mbs_needle; mbfl_string_init(&mbs_needle); mbs_needle.no_language = MBSTRG(current_language); mbs_needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_needle.val = (unsigned char *)needle.data(); mbs_needle.len = needle.size(); const char *from_encoding; if (encoding.empty()) { from_encoding = MBSTRG(current_internal_encoding)->mime_name; } else { from_encoding = encoding.data(); } mbs_haystack.no_encoding = mbs_needle.no_encoding = mbfl_name2no_encoding(from_encoding); if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", from_encoding); return false; } int n = php_mb_stripos(1, (const char*)mbs_haystack.val, mbs_haystack.len, (const char*)mbs_needle.val, mbs_needle.len, 0, from_encoding); if (n < 0) { return false; } mbfl_string result, *ret = nullptr; int mblen = mbfl_strlen(&mbs_haystack); if (part) { ret = mbfl_substr(&mbs_haystack, &result, 0, n); } else { int len = (mblen - n); ret = mbfl_substr(&mbs_haystack, &result, n, len); } if (ret != nullptr) { return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_strstr, const String& haystack, const String& needle, bool part /* = false */, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string mbs_haystack; mbfl_string_init(&mbs_haystack); mbs_haystack.no_language = MBSTRG(current_language); mbs_haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_haystack.val = (unsigned char *)haystack.data(); mbs_haystack.len = haystack.size(); mbfl_string mbs_needle; mbfl_string_init(&mbs_needle); mbs_needle.no_language = MBSTRG(current_language); mbs_needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_needle.val = (unsigned char *)needle.data(); mbs_needle.len = needle.size(); if (!encoding.empty()) { mbs_haystack.no_encoding = mbs_needle.no_encoding = mbfl_name2no_encoding(encoding.data()); if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } if (mbs_needle.len <= 0) { raise_warning("Empty delimiter."); return false; } mbfl_string result, *ret = nullptr; int n = mbfl_strpos(&mbs_haystack, &mbs_needle, 0, 0); if (n >= 0) { int mblen = mbfl_strlen(&mbs_haystack); if (part) { ret = mbfl_substr(&mbs_haystack, &result, 0, n); } else { int len = (mblen - n); ret = mbfl_substr(&mbs_haystack, &result, n, len); } } if (ret != nullptr) { return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString); } return false; } const StaticString s_utf_8("utf-8"); /** * Fast check for the most common form of the UTF-8 encoding identifier. */ ALWAYS_INLINE static bool isUtf8(const Variant& encoding) { return encoding.getStringDataOrNull() == s_utf_8.get(); } /** * Given a byte sequence, return * 0 if it contains bytes >= 128 (thus non-ASCII), else * -1 if it contains any upper-case character ('A'-'Z'), else * 1 (and thus is a lower-case ASCII string). */ ALWAYS_INLINE static int isUtf8AsciiLower(folly::StringPiece s) { const auto bytelen = s.size(); bool caseOK = true; for (uint32_t i = 0; i < bytelen; ++i) { uint8_t byte = s[i]; if (byte >= 128) { return 0; } else if (byte <= 'Z' && byte >= 'A') { caseOK = false; } } return caseOK ? 1 : -1; } /** * Return a string containing the lower-case of a given ASCII string. */ ALWAYS_INLINE static StringData* asciiToLower(const StringData* s) { const auto size = s->size(); auto ret = StringData::Make(s, CopyString); auto output = ret->mutableData(); for (int i = 0; i < size; ++i) { auto& c = output[i]; if (c <= 'Z' && c >= 'A') { c |= 0x20; } } ret->invalidateHash(); // We probably modified it. return ret; } /* Like isUtf8AsciiLower, but with upper/lower swapped. */ ALWAYS_INLINE static int isUtf8AsciiUpper(folly::StringPiece s) { const auto bytelen = s.size(); bool caseOK = true; for (uint32_t i = 0; i < bytelen; ++i) { uint8_t byte = s[i]; if (byte >= 128) { return 0; } else if (byte >= 'a' && byte <= 'z') { caseOK = false; } } return caseOK ? 1 : -1; } /* Like asciiToLower, but with upper/lower swapped. */ ALWAYS_INLINE static StringData* asciiToUpper(const StringData* s) { const auto size = s->size(); auto ret = StringData::Make(s, CopyString); auto output = ret->mutableData(); for (int i = 0; i < size; ++i) { auto& c = output[i]; if (c >= 'a' && c <= 'z') { c -= (char)0x20; } } ret->invalidateHash(); // We probably modified it. return ret; } Variant HHVM_FUNCTION(mb_strtolower, const String& str, const Variant& opt_encoding) { /* Fast-case for empty static string without dereferencing any pointers. */ if (str.get() == staticEmptyString()) return empty_string_variant(); if (LIKELY(isUtf8(opt_encoding))) { /* Fast-case for ASCII. */ if (auto sd = str.get()) { auto sl = sd->slice(); auto r = isUtf8AsciiLower(sl); if (r > 0) { return str; } else if (r < 0) { return String::attach(asciiToLower(sd)); } } } const String encoding = convertArg(opt_encoding); const char *from_encoding; if (encoding.empty()) { from_encoding = MBSTRG(current_internal_encoding)->mime_name; } else { from_encoding = encoding.data(); } unsigned int ret_len; char *newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str.data(), str.size(), &ret_len, from_encoding); if (newstr) { return String(newstr, ret_len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_strtoupper, const String& str, const Variant& opt_encoding) { /* Fast-case for empty static string without dereferencing any pointers. */ if (str.get() == staticEmptyString()) return empty_string_variant(); if (LIKELY(isUtf8(opt_encoding))) { /* Fast-case for ASCII. */ if (auto sd = str.get()) { auto sl = sd->slice(); auto r = isUtf8AsciiUpper(sl); if (r > 0) { return str; } else if (r < 0) { return String::attach(asciiToUpper(sd)); } } } const String encoding = convertArg(opt_encoding); const char *from_encoding; if (encoding.empty()) { from_encoding = MBSTRG(current_internal_encoding)->mime_name; } else { from_encoding = encoding.data(); } unsigned int ret_len; char *newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str.data(), str.size(), &ret_len, from_encoding); if (newstr) { return String(newstr, ret_len, AttachString); } return false; } Variant HHVM_FUNCTION(mb_strwidth, const String& str, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string string; mbfl_string_init(&string); string.no_language = MBSTRG(current_language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str.data(); string.len = str.size(); if (!encoding.empty()) { string.no_encoding = mbfl_name2no_encoding(encoding.data()); if (string.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } int n = mbfl_strwidth(&string); if (n >= 0) { return n; } return false; } Variant HHVM_FUNCTION(mb_substitute_character, const Variant& substrchar /* = uninit_variant */) { if (substrchar.isNull()) { switch (MBSTRG(current_filter_illegal_mode)) { case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE: return "none"; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG: return "long"; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: return "entity"; default: return MBSTRG(current_filter_illegal_substchar); } } if (substrchar.isString()) { String s = substrchar.toString(); if (strcasecmp("none", s.data()) == 0) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; return true; } if (strcasecmp("long", s.data()) == 0) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; return true; } if (strcasecmp("entity", s.data()) == 0) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; return true; } } int64_t n = substrchar.toInt64(); if (n < 0xffff && n > 0) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_substchar) = n; } else { raise_warning("Unknown character."); return false; } return true; } Variant HHVM_FUNCTION(mb_substr_count, const String& haystack, const String& needle, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); mbfl_string mbs_haystack; mbfl_string_init(&mbs_haystack); mbs_haystack.no_language = MBSTRG(current_language); mbs_haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_haystack.val = (unsigned char *)haystack.data(); mbs_haystack.len = haystack.size(); mbfl_string mbs_needle; mbfl_string_init(&mbs_needle); mbs_needle.no_language = MBSTRG(current_language); mbs_needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; mbs_needle.val = (unsigned char *)needle.data(); mbs_needle.len = needle.size(); if (!encoding.empty()) { mbs_haystack.no_encoding = mbs_needle.no_encoding = mbfl_name2no_encoding(encoding.data()); if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } } if (mbs_needle.len <= 0) { raise_warning("Empty substring."); return false; } int n = mbfl_substr_count(&mbs_haystack, &mbs_needle); if (n >= 0) { return n; } return false; } /////////////////////////////////////////////////////////////////////////////// // regex helpers typedef struct _php_mb_regex_enc_name_map_t { const char *names; OnigEncoding code; } php_mb_regex_enc_name_map_t; static php_mb_regex_enc_name_map_t enc_name_map[] ={ { "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0", ONIG_ENCODING_EUC_JP }, { "UTF-8\0UTF8\0", ONIG_ENCODING_UTF8 }, { "UTF-16\0UTF-16BE\0", ONIG_ENCODING_UTF16_BE }, { "UTF-16LE\0", ONIG_ENCODING_UTF16_LE }, { "UCS-4\0UTF-32\0UTF-32BE\0", ONIG_ENCODING_UTF32_BE }, { "UCS-4LE\0UTF-32LE\0", ONIG_ENCODING_UTF32_LE }, { "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0", ONIG_ENCODING_SJIS }, { "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0", ONIG_ENCODING_BIG5 }, { "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0", ONIG_ENCODING_EUC_CN }, { "EUC-TW\0EUCTW\0EUC_TW\0", ONIG_ENCODING_EUC_TW }, { "EUC-KR\0EUCKR\0EUC_KR\0", ONIG_ENCODING_EUC_KR }, { "KOI8R\0KOI8-R\0KOI-8R\0", ONIG_ENCODING_KOI8_R }, { "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0", ONIG_ENCODING_ISO_8859_1 }, { "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0", ONIG_ENCODING_ISO_8859_2 }, { "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0", ONIG_ENCODING_ISO_8859_3 }, { "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0", ONIG_ENCODING_ISO_8859_4 }, { "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0", ONIG_ENCODING_ISO_8859_5 }, { "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0", ONIG_ENCODING_ISO_8859_6 }, { "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0", ONIG_ENCODING_ISO_8859_7 }, { "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0", ONIG_ENCODING_ISO_8859_8 }, { "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0", ONIG_ENCODING_ISO_8859_9 }, { "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0", ONIG_ENCODING_ISO_8859_10 }, { "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0", ONIG_ENCODING_ISO_8859_11 }, { "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0", ONIG_ENCODING_ISO_8859_13 }, { "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0", ONIG_ENCODING_ISO_8859_14 }, { "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0", ONIG_ENCODING_ISO_8859_15 }, { "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0", ONIG_ENCODING_ISO_8859_16 }, { "ASCII\0US-ASCII\0US_ASCII\0ISO646\0", ONIG_ENCODING_ASCII }, { nullptr, ONIG_ENCODING_UNDEF } }; static OnigEncoding php_mb_regex_name2mbctype(const char *pname) { const char *p; php_mb_regex_enc_name_map_t *mapping; if (pname == nullptr) { return ONIG_ENCODING_UNDEF; } for (mapping = enc_name_map; mapping->names != nullptr; mapping++) { for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) { if (strcasecmp(p, pname) == 0) { return mapping->code; } } } return ONIG_ENCODING_UNDEF; } static const char *php_mb_regex_mbctype2name(OnigEncoding mbctype) { php_mb_regex_enc_name_map_t *mapping; for (mapping = enc_name_map; mapping->names != nullptr; mapping++) { if (mapping->code == mbctype) { return mapping->names; } } return nullptr; } /* * regex cache */ static php_mb_regex_t *php_mbregex_compile_pattern(const String& pattern, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax) { int err_code = 0; OnigErrorInfo err_info; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; php_mb_regex_t *rc = nullptr; std::string spattern = std::string(pattern.data(), pattern.size()); RegexCache &cache = MBSTRG(ht_rc); RegexCache::const_iterator it = cache.find(spattern); if (it != cache.end()) { rc = it->second; } if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) { if (rc) { onig_free(rc); rc = nullptr; } if ((err_code = onig_new(&rc, (OnigUChar *)pattern.data(), (OnigUChar *)(pattern.data() + pattern.size()), options,enc, syntax, &err_info)) != ONIG_NORMAL) { onig_error_code_to_str(err_str, err_code, &err_info); raise_warning("mbregex compile err: %s", err_str); return nullptr; } MBSTRG(ht_rc)[spattern] = rc; } return rc; } static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax) { size_t len_left = len; size_t len_req = 0; char *p = str; char c; if ((option & ONIG_OPTION_IGNORECASE) != 0) { if (len_left > 0) { --len_left; *(p++) = 'i'; } ++len_req; } if ((option & ONIG_OPTION_EXTEND) != 0) { if (len_left > 0) { --len_left; *(p++) = 'x'; } ++len_req; } if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) == (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) { if (len_left > 0) { --len_left; *(p++) = 'p'; } ++len_req; } else { if ((option & ONIG_OPTION_MULTILINE) != 0) { if (len_left > 0) { --len_left; *(p++) = 'm'; } ++len_req; } if ((option & ONIG_OPTION_SINGLELINE) != 0) { if (len_left > 0) { --len_left; *(p++) = 's'; } ++len_req; } } if ((option & ONIG_OPTION_FIND_LONGEST) != 0) { if (len_left > 0) { --len_left; *(p++) = 'l'; } ++len_req; } if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) { if (len_left > 0) { --len_left; *(p++) = 'n'; } ++len_req; } c = 0; if (syntax == ONIG_SYNTAX_JAVA) { c = 'j'; } else if (syntax == ONIG_SYNTAX_GNU_REGEX) { c = 'u'; } else if (syntax == ONIG_SYNTAX_GREP) { c = 'g'; } else if (syntax == ONIG_SYNTAX_EMACS) { c = 'c'; } else if (syntax == ONIG_SYNTAX_RUBY) { c = 'r'; } else if (syntax == ONIG_SYNTAX_PERL) { c = 'z'; } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) { c = 'b'; } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) { c = 'd'; } if (c != 0) { if (len_left > 0) { --len_left; *(p++) = c; } ++len_req; } if (len_left > 0) { --len_left; *(p++) = '\0'; } ++len_req; if (len < len_req) { return len_req; } return 0; } static void _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval) { int n; char c; int optm = 0; *syntax = ONIG_SYNTAX_RUBY; if (parg != nullptr) { n = 0; while (n < narg) { c = parg[n++]; switch (c) { case 'i': optm |= ONIG_OPTION_IGNORECASE; break; case 'x': optm |= ONIG_OPTION_EXTEND; break; case 'm': optm |= ONIG_OPTION_MULTILINE; break; case 's': optm |= ONIG_OPTION_SINGLELINE; break; case 'p': optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; break; case 'l': optm |= ONIG_OPTION_FIND_LONGEST; break; case 'n': optm |= ONIG_OPTION_FIND_NOT_EMPTY; break; case 'j': *syntax = ONIG_SYNTAX_JAVA; break; case 'u': *syntax = ONIG_SYNTAX_GNU_REGEX; break; case 'g': *syntax = ONIG_SYNTAX_GREP; break; case 'c': *syntax = ONIG_SYNTAX_EMACS; break; case 'r': *syntax = ONIG_SYNTAX_RUBY; break; case 'z': *syntax = ONIG_SYNTAX_PERL; break; case 'b': *syntax = ONIG_SYNTAX_POSIX_BASIC; break; case 'd': *syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; case 'e': if (eval != nullptr) *eval = 1; break; default: break; } } if (option != nullptr) *option|=optm; } } /////////////////////////////////////////////////////////////////////////////// // regex functions bool HHVM_FUNCTION(mb_ereg_match, const String& pattern, const String& str, const Variant& opt_option) { const String option = convertArg(opt_option); OnigSyntaxType *syntax; OnigOptionType noption = 0; if (!option.empty()) { _php_mb_regex_init_options(option.data(), option.size(), &noption, &syntax, nullptr); } else { noption |= MBSTRG(regex_default_options); syntax = MBSTRG(regex_default_syntax); } php_mb_regex_t *re; if ((re = php_mbregex_compile_pattern (pattern, noption, MBSTRG(current_mbctype), syntax)) == nullptr) { return false; } /* match */ int err = onig_match(re, (OnigUChar *)str.data(), (OnigUChar *)(str.data() + str.size()), (OnigUChar *)str.data(), nullptr, 0); return err >= 0; } static Variant _php_mb_regex_ereg_replace_exec(const Variant& pattern, const String& replacement, const String& str, const String& option, OnigOptionType options) { const char *p; php_mb_regex_t *re; OnigSyntaxType *syntax; OnigRegion *regs = nullptr; StringBuffer out_buf; int i, err, eval, n; OnigUChar *pos; OnigUChar *string_lim; char pat_buf[2]; const mbfl_encoding *enc; { const char *current_enc_name; current_enc_name = php_mb_regex_mbctype2name(MBSTRG(current_mbctype)); if (current_enc_name == nullptr || (enc = mbfl_name2encoding(current_enc_name)) == nullptr) { raise_warning("Unknown error"); return false; } } eval = 0; { if (!option.empty()) { _php_mb_regex_init_options(option.data(), option.size(), &options, &syntax, &eval); } else { options |= MBSTRG(regex_default_options); syntax = MBSTRG(regex_default_syntax); } } String spattern; if (pattern.isString()) { spattern = pattern.toString(); } else { /* FIXME: this code is not multibyte aware! */ pat_buf[0] = pattern.toByte(); pat_buf[1] = '\0'; spattern = String(pat_buf, 1, CopyString); } /* create regex pattern buffer */ re = php_mbregex_compile_pattern(spattern, options, MBSTRG(current_mbctype), syntax); if (re == nullptr) { return false; } if (eval) { throw_not_supported("ereg_replace", "dynamic coding"); } /* do the actual work */ err = 0; pos = (OnigUChar*)str.data(); string_lim = (OnigUChar*)(str.data() + str.size()); regs = onig_region_new(); while (err >= 0) { err = onig_search(re, (OnigUChar *)str.data(), (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0); if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); raise_warning("mbregex search failure: %s", err_str); break; } if (err >= 0) { #if moriyoshi_0 if (regs->beg[0] == regs->end[0]) { raise_warning("Empty regular expression"); break; } #endif /* copy the part of the string before the match */ out_buf.append((const char *)pos, (OnigUChar *)(str.data() + regs->beg[0]) - pos); /* copy replacement and backrefs */ i = 0; p = replacement.data(); while (i < replacement.size()) { int fwd = (int)php_mb_mbchar_bytes_ex(p, enc); n = -1; auto const remaining = replacement.size() - i; if (remaining >= 2 && fwd == 1 && p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { n = p[1] - '0'; } if (n >= 0 && n < regs->num_regs) { if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= str.size()) { out_buf.append(str.data() + regs->beg[n], regs->end[n] - regs->beg[n]); } p += 2; i += 2; } else if (remaining >= fwd) { out_buf.append(p, fwd); p += fwd; i += fwd; } else { raise_warning("Replacement ends with unterminated %s: 0x%hhx", enc->name, *p); break; } } n = regs->end[0]; if ((pos - (OnigUChar *)str.data()) < n) { pos = (OnigUChar *)(str.data() + n); } else { if (pos < string_lim) { out_buf.append((const char *)pos, 1); } pos++; } } else { /* nomatch */ /* stick that last bit of string on our output */ if (string_lim - pos > 0) { out_buf.append((const char *)pos, string_lim - pos); } } onig_region_free(regs, 0); } if (regs != nullptr) { onig_region_free(regs, 1); } if (err <= -2) { return false; } return out_buf.detach(); } Variant HHVM_FUNCTION(mb_ereg_replace, const Variant& pattern, const String& replacement, const String& str, const Variant& opt_option) { const String option = convertArg(opt_option); return _php_mb_regex_ereg_replace_exec(pattern, replacement, str, option, 0); } Variant HHVM_FUNCTION(mb_eregi_replace, const Variant& pattern, const String& replacement, const String& str, const Variant& opt_option) { const String option = convertArg(opt_option); return _php_mb_regex_ereg_replace_exec(pattern, replacement, str, option, ONIG_OPTION_IGNORECASE); } int64_t HHVM_FUNCTION(mb_ereg_search_getpos) { return MBSTRG(search_pos); } bool HHVM_FUNCTION(mb_ereg_search_setpos, int position) { if (position < 0 || position >= (int)MBSTRG(search_str).size()) { raise_warning("Position is out of range"); MBSTRG(search_pos) = 0; return false; } MBSTRG(search_pos) = position; return true; } Variant HHVM_FUNCTION(mb_ereg_search_getregs) { OnigRegion *search_regs = MBSTRG(search_regs); if (search_regs && !MBSTRG(search_str).empty()) { Array ret = Array::CreateVec(); OnigUChar *str = (OnigUChar *)MBSTRG(search_str).data(); int len = MBSTRG(search_str).size(); int n = search_regs->num_regs; for (int i = 0; i < n; i++) { int beg = search_regs->beg[i]; int end = search_regs->end[i]; if (beg >= 0 && beg <= end && end <= len) { ret.append(String((const char *)(str + beg), end - beg, CopyString)); } else { ret.append(false); } } return ret; } return false; } bool HHVM_FUNCTION(mb_ereg_search_init, const String& str, const Variant& opt_pattern, const Variant& opt_option) { const String pattern = convertArg(opt_pattern); const String option = convertArg(opt_option); OnigOptionType noption = MBSTRG(regex_default_options); OnigSyntaxType *syntax = MBSTRG(regex_default_syntax); if (!option.empty()) { noption = 0; _php_mb_regex_init_options(option.data(), option.size(), &noption, &syntax, nullptr); } if (!pattern.empty()) { if ((MBSTRG(search_re) = php_mbregex_compile_pattern (pattern, noption, MBSTRG(current_mbctype), syntax)) == nullptr) { return false; } } MBSTRG(search_str) = std::string(str.data(), str.size()); MBSTRG(search_pos) = 0; if (MBSTRG(search_regs) != nullptr) { onig_region_free(MBSTRG(search_regs), 1); MBSTRG(search_regs) = (OnigRegion *)nullptr; } return true; } /* regex search */ static Variant _php_mb_regex_ereg_search_exec(const String& pattern, const String& option, int mode) { int n, i, err, pos, len, beg, end; OnigUChar *str; OnigSyntaxType *syntax = MBSTRG(regex_default_syntax); OnigOptionType noption; noption = MBSTRG(regex_default_options); if (!option.empty()) { noption = 0; _php_mb_regex_init_options(option.data(), option.size(), &noption, &syntax, nullptr); } if (!pattern.empty()) { if ((MBSTRG(search_re) = php_mbregex_compile_pattern (pattern, noption, MBSTRG(current_mbctype), syntax)) == nullptr) { return false; } } pos = MBSTRG(search_pos); str = nullptr; len = 0; if (!MBSTRG(search_str).empty()) { str = (OnigUChar *)MBSTRG(search_str).data(); len = MBSTRG(search_str).size(); } if (MBSTRG(search_re) == nullptr) { raise_warning("No regex given"); return false; } if (str == nullptr) { raise_warning("No string given"); return false; } if (MBSTRG(search_regs)) { onig_region_free(MBSTRG(search_regs), 1); } MBSTRG(search_regs) = onig_region_new(); err = onig_search(MBSTRG(search_re), str, str + len, str + pos, str + len, MBSTRG(search_regs), 0); Variant ret; if (err == ONIG_MISMATCH) { MBSTRG(search_pos) = len; ret = false; } else if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); raise_warning("mbregex search failure in mbregex_search(): %s", err_str); ret = false; } else { if (MBSTRG(search_regs)->beg[0] == MBSTRG(search_regs)->end[0]) { raise_warning("Empty regular expression"); } switch (mode) { case 1: { beg = MBSTRG(search_regs)->beg[0]; end = MBSTRG(search_regs)->end[0]; ret = make_vec_array(beg, end - beg); } break; case 2: n = MBSTRG(search_regs)->num_regs; ret = Variant(Array::CreateVec()); for (i = 0; i < n; i++) { beg = MBSTRG(search_regs)->beg[i]; end = MBSTRG(search_regs)->end[i]; if (beg >= 0 && beg <= end && end <= len) { ret.asArrRef().append( String((const char *)(str + beg), end - beg, CopyString)); } else { ret.asArrRef().append(false); } } break; default: ret = true; break; } end = MBSTRG(search_regs)->end[0]; if (pos < end) { MBSTRG(search_pos) = end; } else { MBSTRG(search_pos) = pos + 1; } } if (err < 0) { onig_region_free(MBSTRG(search_regs), 1); MBSTRG(search_regs) = (OnigRegion *)nullptr; } return ret; } Variant HHVM_FUNCTION(mb_ereg_search, const Variant& opt_pattern, const Variant& opt_option) { const String pattern = convertArg(opt_pattern); const String option = convertArg(opt_option); return _php_mb_regex_ereg_search_exec(pattern, option, 0); } Variant HHVM_FUNCTION(mb_ereg_search_pos, const Variant& opt_pattern, const Variant& opt_option) { const String pattern = convertArg(opt_pattern); const String option = convertArg(opt_option); return _php_mb_regex_ereg_search_exec(pattern, option, 1); } Variant HHVM_FUNCTION(mb_ereg_search_regs, const Variant& opt_pattern, const Variant& opt_option) { const String pattern = convertArg(opt_pattern); const String option = convertArg(opt_option); return _php_mb_regex_ereg_search_exec(pattern, option, 2); } static Variant _php_mb_regex_ereg_exec(const Variant& pattern, const String& str, Variant *regs, int icase) { php_mb_regex_t *re; OnigRegion *regions = nullptr; int i, match_len, beg, end; OnigOptionType options; options = MBSTRG(regex_default_options); if (icase) { options |= ONIG_OPTION_IGNORECASE; } /* compile the regular expression from the supplied regex */ String spattern; if (!pattern.isString()) { /* we convert numbers to integers and treat them as a string */ if (pattern.is(KindOfDouble)) { spattern = String(pattern.toInt64()); /* get rid of decimal places */ } else { spattern = pattern.toString(); } } else { spattern = pattern.toString(); } re = php_mbregex_compile_pattern(spattern, options, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax)); if (re == nullptr) { return false; } regions = onig_region_new(); /* actually execute the regular expression */ if (onig_search(re, (OnigUChar *)str.data(), (OnigUChar *)(str.data() + str.size()), (OnigUChar *)str.data(), (OnigUChar *)(str.data() + str.size()), regions, 0) < 0) { onig_region_free(regions, 1); return false; } const char *s = str.data(); int string_len = str.size(); match_len = regions->end[0] - regions->beg[0]; VecInit regsPai(regions->num_regs); for (i = 0; i < regions->num_regs; i++) { beg = regions->beg[i]; end = regions->end[i]; if (beg >= 0 && beg < end && end <= string_len) { regsPai.append(String(s + beg, end - beg, CopyString)); } else { regsPai.append(false); } } if (regs) *regs = regsPai.toArray(); if (match_len == 0) { match_len = 1; } if (regions != nullptr) { onig_region_free(regions, 1); } return match_len; } Variant HHVM_FUNCTION(mb_ereg, const Variant& pattern, const String& str, Variant& regs) { return _php_mb_regex_ereg_exec(pattern, str, &regs, 0); } Variant HHVM_FUNCTION(mb_eregi, const Variant& pattern, const String& str, Variant& regs) { return _php_mb_regex_ereg_exec(pattern, str, &regs, 1); } Variant HHVM_FUNCTION(mb_regex_encoding, const Variant& opt_encoding) { const String encoding = convertArg(opt_encoding); if (encoding.empty()) { const char *retval = php_mb_regex_mbctype2name(MBSTRG(current_mbctype)); if (retval != nullptr) { return String(retval, CopyString); } return false; } OnigEncoding mbctype = php_mb_regex_name2mbctype(encoding.data()); if (mbctype == ONIG_ENCODING_UNDEF) { raise_warning("Unknown encoding \"%s\"", encoding.data()); return false; } MBSTRG(current_mbctype) = mbctype; return true; } static void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax) { if (prev_options != nullptr) { *prev_options = MBSTRG(regex_default_options); } if (prev_syntax != nullptr) { *prev_syntax = MBSTRG(regex_default_syntax); } MBSTRG(regex_default_options) = options; MBSTRG(regex_default_syntax) = syntax; } String HHVM_FUNCTION(mb_regex_set_options, const Variant& opt_options) { const String options = convertArg(opt_options); OnigOptionType opt; OnigSyntaxType *syntax; char buf[16]; if (!options.empty()) { opt = 0; syntax = nullptr; _php_mb_regex_init_options(options.data(), options.size(), &opt, &syntax, nullptr); php_mb_regex_set_options(opt, syntax, nullptr, nullptr); } else { opt = MBSTRG(regex_default_options); syntax = MBSTRG(regex_default_syntax); } _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax); return String(buf, CopyString); } Variant HHVM_FUNCTION(mb_split, const String& pattern, const String& str, int count /* = -1 */) { php_mb_regex_t *re; OnigRegion *regs = nullptr; int n, err; if (count == 0) { count = 1; } /* create regex pattern buffer */ if ((re = php_mbregex_compile_pattern(pattern, MBSTRG(regex_default_options), MBSTRG(current_mbctype), MBSTRG(regex_default_syntax))) == nullptr) { return false; } Array ret = Array::CreateVec(); OnigUChar *pos0 = (OnigUChar *)str.data(); OnigUChar *pos_end = (OnigUChar *)(str.data() + str.size()); OnigUChar *pos = pos0; err = 0; regs = onig_region_new(); /* churn through str, generating array entries as we go */ while ((--count != 0) && (err = onig_search(re, pos0, pos_end, pos, pos_end, regs, 0)) >= 0) { if (regs->beg[0] == regs->end[0]) { raise_warning("Empty regular expression"); break; } /* add it to the array */ if (regs->beg[0] < str.size() && regs->beg[0] >= (pos - pos0)) { ret.append(String((const char *)pos, ((OnigUChar *)(str.data() + regs->beg[0]) - pos), CopyString)); } else { err = -2; break; } /* point at our new starting point */ n = regs->end[0]; if ((pos - pos0) < n) { pos = pos0 + n; } if (count < 0) { count = 0; } onig_region_free(regs, 0); } onig_region_free(regs, 1); /* see if we encountered an error */ if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); raise_warning("mbregex search failure in mbsplit(): %s", err_str); return false; } /* otherwise we just have one last element to add to the array */ n = pos_end - pos; if (n > 0) { ret.append(String((const char *)pos, n, CopyString)); } else { ret.append(""); } return ret; } /////////////////////////////////////////////////////////////////////////////// #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \ if (str[pos] == '\r' && str[pos + 1] == '\n' && \ (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \ pos += 2; \ while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \ pos++; \ } \ continue; \ } static int _php_mbstr_parse_mail_headers(Array &ht, const char *str, size_t str_len) { const char *ps; size_t icnt; int state = 0; int crlf_state = -1; StringBuffer token; String fld_name, fld_val; ps = str; icnt = str_len; /* * C o n t e n t - T y p e : t e x t / h t m l \r\n * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^ * state 0 1 2 3 * * C o n t e n t - T y p e : t e x t / h t m l \r\n * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^ * crlf_state -1 0 1 -1 * */ while (icnt > 0) { switch (*ps) { case ':': if (crlf_state == 1) { token.append('\r'); } if (state == 0 || state == 1) { fld_name = token.detach(); state = 2; } else { token.append(*ps); } crlf_state = 0; break; case '\n': if (crlf_state == -1) { goto out; } crlf_state = -1; break; case '\r': if (crlf_state == 1) { token.append('\r'); } else { crlf_state = 1; } break; case ' ': case '\t': if (crlf_state == -1) { if (state == 3) { /* continuing from the previous line */ state = 4; } else { /* simply skipping this new line */ state = 5; } } else { if (crlf_state == 1) { token.append('\r'); } if (state == 1 || state == 3) { token.append(*ps); } } crlf_state = 0; break; default: switch (state) { case 0: token.clear(); state = 1; break; case 2: if (crlf_state != -1) { token.clear(); state = 3; break; } /* break is missing intentionally */ case 3: if (crlf_state == -1) { fld_val = token.detach(); if (!fld_name.empty() && !fld_val.empty()) { /* FIXME: some locale free implementation is * really required here,,, */ ht.set(HHVM_FN(strtoupper)(fld_name), fld_val); } state = 1; } break; case 4: token.append(' '); state = 3; break; } if (crlf_state == 1) { token.append('\r'); } token.append(*ps); crlf_state = 0; break; } ps++, icnt--; } out: if (state == 2) { token.clear(); state = 3; } if (state == 3) { fld_val = token.detach(); if (!fld_name.empty() && !fld_val.empty()) { /* FIXME: some locale free implementation is * really required here,,, */ ht.set(HHVM_FN(strtoupper)(fld_name), fld_val); } } return state; } static int php_mail(const char *to, const char *subject, const char *message, const char *headers, const char *extra_cmd) { const char *sendmail_path = "/usr/sbin/sendmail -t -i"; String sendmail_cmd = sendmail_path; if (extra_cmd != nullptr) { sendmail_cmd += " "; sendmail_cmd += extra_cmd; } /* Since popen() doesn't indicate if the internal fork() doesn't work * (e.g. the shell can't be executed) we explicitly set it to 0 to be * sure we don't catch any older errno value. */ errno = 0; FILE *sendmail = popen(sendmail_cmd.data(), "w"); if (sendmail == nullptr) { raise_warning("Could not execute mail delivery program '%s'", sendmail_path); return 0; } if (EACCES == errno) { raise_warning("Permission denied: unable to execute shell to run " "mail delivery binary '%s'", sendmail_path); pclose(sendmail); return 0; } fprintf(sendmail, "To: %s\n", to); fprintf(sendmail, "Subject: %s\n", subject); if (headers != nullptr) { fprintf(sendmail, "%s\n", headers); } fprintf(sendmail, "\n%s\n", message); int ret = pclose(sendmail); #if defined(EX_TEMPFAIL) if ((ret != EX_OK) && (ret != EX_TEMPFAIL)) return 0; #elif defined(EX_OK) if (ret != EX_OK) return 0; #else if (ret != 0) return 0; #endif return 1; } bool HHVM_FUNCTION(mb_send_mail, const String& to, const String& subject, const String& message, const Variant& opt_headers, const Variant& opt_extra_cmd) { const String headers = convertArg(opt_headers); const String extra_cmd = convertArg(opt_extra_cmd); /* initialize */ /* automatic allocateable buffer for additional header */ mbfl_memory_device device; mbfl_memory_device_init(&device, 0, 0); mbfl_string orig_str, conv_str; mbfl_string_init(&orig_str); mbfl_string_init(&conv_str); /* character-set, transfer-encoding */ mbfl_no_encoding tran_cs, /* transfar text charset */ head_enc, /* header transfar encoding */ body_enc; /* body transfar encoding */ tran_cs = mbfl_no_encoding_utf8; head_enc = mbfl_no_encoding_base64; body_enc = mbfl_no_encoding_base64; const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language)); if (lang != nullptr) { tran_cs = lang->mail_charset; head_enc = lang->mail_header_encoding; body_enc = lang->mail_body_encoding; } Array ht_headers = Array::CreateDict(); if (!headers.empty()) { _php_mbstr_parse_mail_headers(ht_headers, headers.data(), headers.size()); } struct { unsigned int cnt_type:1; unsigned int cnt_trans_enc:1; } suppressed_hdrs = { 0, 0 }; static const StaticString s_CONTENT_TYPE("CONTENT-TYPE"); String s = ht_headers[s_CONTENT_TYPE].toString(); if (!s.isNull()) { char *tmp; char *param_name; char *charset = nullptr; char *p = const_cast<char*>(strchr(s.data(), ';')); if (p != nullptr) { /* skipping the padded spaces */ do { ++p; } while (*p == ' ' || *p == '\t'); if (*p != '\0') { if ((param_name = strtok_r(p, "= ", &tmp)) != nullptr) { if (strcasecmp(param_name, "charset") == 0) { mbfl_no_encoding _tran_cs = tran_cs; charset = strtok_r(nullptr, "= ", &tmp); if (charset != nullptr) { _tran_cs = mbfl_name2no_encoding(charset); } if (_tran_cs == mbfl_no_encoding_invalid) { raise_warning("Unsupported charset \"%s\" - " "will be regarded as ascii", charset); _tran_cs = mbfl_no_encoding_ascii; } tran_cs = _tran_cs; } } } } suppressed_hdrs.cnt_type = 1; } static const StaticString s_CONTENT_TRANSFER_ENCODING("CONTENT-TRANSFER-ENCODING"); s = ht_headers[s_CONTENT_TRANSFER_ENCODING].toString(); if (!s.isNull()) { mbfl_no_encoding _body_enc = mbfl_name2no_encoding(s.data()); switch (_body_enc) { case mbfl_no_encoding_base64: case mbfl_no_encoding_7bit: case mbfl_no_encoding_8bit: body_enc = _body_enc; break; default: raise_warning("Unsupported transfer encoding \"%s\" - " "will be regarded as 8bit", s.data()); body_enc = mbfl_no_encoding_8bit; break; } suppressed_hdrs.cnt_trans_enc = 1; } /* To: */ char *to_r = nullptr; int err = 0; if (auto to_len = strlen(to.data())) { // not to.size() to_r = req::strndup(to.data(), to_len); for (; to_len; to_len--) { if (!isspace((unsigned char)to_r[to_len - 1])) { break; } to_r[to_len - 1] = '\0'; } for (size_t i = 0; to_r[i]; i++) { if (iscntrl((unsigned char)to_r[i])) { /** * According to RFC 822, section 3.1.1 long headers may be * separated into parts using CRLF followed at least one * linear-white-space character ('\t' or ' '). * To prevent these separators from being replaced with a space, * we use the SKIP_LONG_HEADER_SEP_MBSTRING to skip over them. */ SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i); to_r[i] = ' '; } } } else { raise_warning("Missing To: field"); err = 1; } /* Subject: */ String encoded_subject; if (!subject.isNull()) { orig_str.no_language = MBSTRG(current_language); orig_str.val = (unsigned char *)subject.data(); orig_str.len = subject.size(); orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { mbfl_encoding *encoding = (mbfl_encoding*) mbfl_identify_encoding2(&orig_str, (const mbfl_encoding**) MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); orig_str.no_encoding = encoding != nullptr ? encoding->no_encoding : mbfl_no_encoding_invalid; } mbfl_string *pstr = mbfl_mime_header_encode (&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]")); if (pstr != nullptr) { encoded_subject = String(reinterpret_cast<char*>(pstr->val), pstr->len, AttachString); } } else { raise_warning("Missing Subject: field"); err = 1; } /* message body */ String encoded_message; if (!message.empty()) { orig_str.no_language = MBSTRG(current_language); orig_str.val = (unsigned char*)message.data(); orig_str.len = message.size(); orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { mbfl_encoding *encoding = (mbfl_encoding*) mbfl_identify_encoding2(&orig_str, (const mbfl_encoding**) MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); orig_str.no_encoding = encoding != nullptr ? encoding->no_encoding : mbfl_no_encoding_invalid; } mbfl_string *pstr = nullptr; { mbfl_string tmpstr; if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != nullptr) { tmpstr.no_encoding = mbfl_no_encoding_8bit; pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc); free(tmpstr.val); } } if (pstr != nullptr) { encoded_message = String(reinterpret_cast<char*>(pstr->val), pstr->len, AttachString); } } else { /* this is not really an error, so it is allowed. */ raise_warning("Empty message body"); } /* other headers */ #define PHP_MBSTR_MAIL_MIME_HEADER1 "Mime-Version: 1.0" #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain" #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset=" #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: " if (!headers.empty()) { const char *p = headers.data(); int n = headers.size(); mbfl_memory_device_strncat(&device, p, n); if (n > 0 && p[n - 1] != '\n') { mbfl_memory_device_strncat(&device, "\n", 1); } } mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1); mbfl_memory_device_strncat(&device, "\n", 1); if (!suppressed_hdrs.cnt_type) { mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1); char *p = (char *)mbfl_no2preferred_mime_name(tran_cs); if (p != nullptr) { mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1); mbfl_memory_device_strcat(&device, p); } mbfl_memory_device_strncat(&device, "\n", 1); } if (!suppressed_hdrs.cnt_trans_enc) { mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1); const char *p = (char *)mbfl_no2preferred_mime_name(body_enc); if (p == nullptr) { p = "7bit"; } mbfl_memory_device_strcat(&device, p); mbfl_memory_device_strncat(&device, "\n", 1); } mbfl_memory_device_unput(&device); mbfl_memory_device_output('\0', &device); char *all_headers = (char *)device.buffer; String cmd = string_escape_shell_cmd(extra_cmd.c_str()); bool ret = (!err && php_mail(to_r, encoded_subject.data(), encoded_message.data(), all_headers, cmd.data())); mbfl_memory_device_clear(&device); req::free(to_r); return ret; } static struct mbstringExtension final : Extension { mbstringExtension() : Extension("mbstring", NO_EXTENSION_VERSION_YET) {} void moduleInit() override { // TODO make these PHP_INI_ALL and thread local once we use them IniSetting::Bind(this, IniSetting::PHP_INI_SYSTEM, "mbstring.http_input", &http_input); IniSetting::Bind(this, IniSetting::PHP_INI_SYSTEM, "mbstring.http_output", &http_output); IniSetting::Bind(this, IniSetting::PHP_INI_ALL, "mbstring.substitute_character", &MBSTRG(current_filter_illegal_mode)); HHVM_RC_INT(MB_OVERLOAD_MAIL, 1); HHVM_RC_INT(MB_OVERLOAD_STRING, 2); HHVM_RC_INT(MB_OVERLOAD_REGEX, 4); HHVM_RC_INT(MB_CASE_UPPER, PHP_UNICODE_CASE_UPPER); HHVM_RC_INT(MB_CASE_LOWER, PHP_UNICODE_CASE_LOWER); HHVM_RC_INT(MB_CASE_TITLE, PHP_UNICODE_CASE_TITLE); HHVM_FE(mb_list_encodings); HHVM_FE(mb_list_encodings_alias_names); HHVM_FE(mb_list_mime_names); HHVM_FE(mb_check_encoding); HHVM_FE(mb_convert_case); HHVM_FE(mb_convert_encoding); HHVM_FE(mb_convert_kana); HHVM_FE(mb_convert_variables); HHVM_FE(mb_decode_mimeheader); HHVM_FE(mb_decode_numericentity); HHVM_FE(mb_detect_encoding); HHVM_FE(mb_detect_order); HHVM_FE(mb_encode_mimeheader); HHVM_FE(mb_encode_numericentity); HHVM_FE(mb_encoding_aliases); HHVM_FE(mb_ereg_match); HHVM_FE(mb_ereg_replace); HHVM_FE(mb_ereg_search_getpos); HHVM_FE(mb_ereg_search_getregs); HHVM_FE(mb_ereg_search_init); HHVM_FE(mb_ereg_search_pos); HHVM_FE(mb_ereg_search_regs); HHVM_FE(mb_ereg_search_setpos); HHVM_FE(mb_ereg_search); HHVM_FE(mb_ereg); HHVM_FE(mb_eregi_replace); HHVM_FE(mb_eregi); HHVM_FE(mb_get_info); HHVM_FE(mb_http_input); HHVM_FE(mb_http_output); HHVM_FE(mb_internal_encoding); HHVM_FE(mb_language); HHVM_FE(mb_output_handler); HHVM_FE(mb_parse_str); HHVM_FE(mb_preferred_mime_name); HHVM_FE(mb_regex_encoding); HHVM_FE(mb_regex_set_options); HHVM_FE(mb_send_mail); HHVM_FE(mb_split); HHVM_FE(mb_strcut); HHVM_FE(mb_strimwidth); HHVM_FE(mb_stripos); HHVM_FE(mb_stristr); HHVM_FE(mb_strlen); HHVM_FE(mb_strpos); HHVM_FE(mb_strrchr); HHVM_FE(mb_strrichr); HHVM_FE(mb_strripos); HHVM_FE(mb_strrpos); HHVM_FE(mb_strstr); HHVM_FE(mb_strtolower); HHVM_FE(mb_strtoupper); HHVM_FE(mb_strwidth); HHVM_FE(mb_substitute_character); HHVM_FE(mb_substr_count); HHVM_FE(mb_substr); loadSystemlib(); } static std::string http_input; static std::string http_output; static std::string substitute_character; } s_mbstring_extension; std::string mbstringExtension::http_input = "pass"; std::string mbstringExtension::http_output = "pass"; /////////////////////////////////////////////////////////////////////////////// }