hphp/runtime/ext/iconv/ext_iconv.cpp (1,686 lines of code) (raw):

/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | | Copyright (c) 1997-2010 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/runtime/base/array-init.h" #include "hphp/runtime/ext/extension.h" #include "hphp/runtime/base/builtin-functions.h" #include "hphp/runtime/base/comparisons.h" #include "hphp/runtime/base/execution-context.h" #include "hphp/runtime/base/runtime-error.h" #include "hphp/runtime/base/string-buffer.h" #include "hphp/runtime/base/zend-functions.h" #include "hphp/runtime/base/zend-string.h" #include "hphp/runtime/base/request-event-handler.h" #include "hphp/util/rds-local.h" #include <folly/lang/Assume.h> #include <boost/algorithm/string/predicate.hpp> #define ICONV_SUPPORTS_ERRNO 1 #include <iconv.h> /** * libiconv sometimes defines the second parameter of its * main function as (char**), and sometimes as (const char**) but * provides no means to detect this. Let build system determine * which is appropriate and (optionally) define ICONV_CONST=const * if such is needed in the cast. */ #ifndef ICONV_CONST # define ICONV_CONST #endif #ifndef INT32_MAX #define INT32_MAX (2147483647) #endif namespace HPHP { /////////////////////////////////////////////////////////////////////////////// static char _generic_superset_name[] = "UCS-4LE"; #define GENERIC_SUPERSET_NAME _generic_superset_name #define GENERIC_SUPERSET_NBYTES 4 #define PHP_ICONV_MIME_DECODE_STRICT (1<<0) #define PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR (1<<1) typedef enum _php_iconv_enc_scheme_t { PHP_ICONV_ENC_SCHEME_BASE64, PHP_ICONV_ENC_SCHEME_QPRINT } php_iconv_enc_scheme_t; typedef enum _php_iconv_err_t { PHP_ICONV_ERR_SUCCESS = 0, PHP_ICONV_ERR_CONVERTER = 1, PHP_ICONV_ERR_WRONG_CHARSET = 2, PHP_ICONV_ERR_TOO_BIG = 3, PHP_ICONV_ERR_ILLEGAL_SEQ = 4, PHP_ICONV_ERR_ILLEGAL_CHAR = 5, PHP_ICONV_ERR_UNKNOWN = 6, PHP_ICONV_ERR_MALFORMED = 7, PHP_ICONV_ERR_ALLOC = 8 } php_iconv_err_t; static void _php_iconv_show_error(const char *func, php_iconv_err_t &err, const char *out_charset, const char *in_charset) { if (!strncmp(func, "libiconv", 8)) func += 3; switch (err) { case PHP_ICONV_ERR_SUCCESS: break; case PHP_ICONV_ERR_CONVERTER: raise_notice("%s(): Cannot open converter", func); break; case PHP_ICONV_ERR_WRONG_CHARSET: raise_notice("%s(): Wrong charset, " "conversion from `%s' to `%s' is not allowed", func, in_charset, out_charset); break; case PHP_ICONV_ERR_ILLEGAL_CHAR: raise_notice("%s(): Detected an incomplete multibyte character " "in input string", func); break; case PHP_ICONV_ERR_ILLEGAL_SEQ: if (boost::ends_with(out_charset, "//IGNORE")) { err = PHP_ICONV_ERR_SUCCESS; break; } raise_notice("%s(): Detected an illegal character in input string", func); break; case PHP_ICONV_ERR_TOO_BIG: // should not happen raise_warning("%s(): Buffer length exceeded", func); break; case PHP_ICONV_ERR_MALFORMED: raise_warning("%s(): Malformed string", func); break; default: // other error raise_notice("%s(): Unknown error (%d)", func, errno); break; } } const StaticString s_ISO_8859_1("ISO-8859-1"); struct ICONVGlobals final : RequestEventHandler { String input_encoding; String output_encoding; String internal_encoding; ICONVGlobals() {} void requestInit() override { input_encoding = s_ISO_8859_1; output_encoding = s_ISO_8859_1; internal_encoding = s_ISO_8859_1; } void requestShutdown() override { input_encoding.reset(); output_encoding.reset(); internal_encoding.reset(); } }; IMPLEMENT_STATIC_REQUEST_LOCAL(ICONVGlobals, s_iconv_globals); #define ICONVG(name) s_iconv_globals->name /////////////////////////////////////////////////////////////////////////////// // helpers namespace { /* * input string is in the form * charset//options * where the //options part is optional. * * we've already checked that charset needs to be replaced by rep, * so now check if we can just return rep as is, or if we need to * graft the options back onto it. */ const char* munge_one(const char* chs, char** tofree, const char* rep, size_t replen, size_t len) { if (!chs[len]) return rep; if (chs[len] != '/' || chs[len + 1] != '/') return chs; auto chslen = strlen(chs); auto charset_len = chslen - len; /* Avoid warnings about reading beyond array bounds, by indicating * to the compiler the relative sizes of the passed in strings. */ folly::assume(len + 2 <= chslen); *tofree = static_cast<char*>(req::malloc_noptrs(replen + charset_len + 1)); memcpy(*tofree, rep, replen); memcpy(*tofree + replen, chs + len, charset_len + 1); return *tofree; } /* * libiconv disagrees with glibc iconv on some charsets. Performing * these substitutions gets libiconv closer to the glibc behavior. * Its not clear that glibc's behavior is better (in fact for uCs-2 * its definitely worse); but it avoids unexpected changes in * behavior. */ const char* munge_charset(const char* chs, char** tofree) { #define MUNGE_ONE(name, repl) do { \ if (!strncasecmp(name, chs, strlen(name))) { \ return munge_one(chs, tofree, repl, strlen(repl), strlen(name)); \ } } while (0) MUNGE_ONE("BIG5", "CP950"); MUNGE_ONE("uCs-2", "uCs-2le"); MUNGE_ONE("Unicode", "UTF-16"); #undef MUNGE_ONE return chs; } iconv_t iconv_open_helper(const char* out, const char* in) { char* tofree1 = nullptr; in = munge_charset(in, &tofree1); char* tofree2 = nullptr; out = munge_charset(out, &tofree2); auto ret = iconv_open(out, in); req::free(tofree1); req::free(tofree2); return ret; } } #ifndef ICONV_CSNMAXLEN #define ICONV_CSNMAXLEN 64 #endif static bool validate_charset(const String& charset) { if (charset.size() >= ICONV_CSNMAXLEN) { raise_invalid_argument_warning ("Charset parameter exceeds the maximum allowed " "length of %d characters", ICONV_CSNMAXLEN); return false; } return true; } static Variant check_charset(const Variant& charset) { String charset_str = charset.isNull() ? null_string : charset.toString(); if (charset_str.empty()) { return ICONVG(internal_encoding); } if (!validate_charset(charset_str)) return false; return charset; } static php_iconv_err_t _php_iconv_appendl(StringBuffer &d, const char *s, size_t l, iconv_t cd) { const char *in_p = s; size_t in_left = l; size_t out_left = 0; size_t buf_growth = 128; char *out_p; #if !ICONV_SUPPORTS_ERRNO size_t prev_in_left = in_left; #endif if (in_p != NULL) { while (in_left > 0) { out_left = buf_growth; out_p = d.appendCursor(out_left); if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **)&out_p, &out_left) == (size_t)-1) { #if ICONV_SUPPORTS_ERRNO switch (errno) { case EINVAL: return PHP_ICONV_ERR_ILLEGAL_CHAR; case EILSEQ: return PHP_ICONV_ERR_ILLEGAL_SEQ; case E2BIG: break; default: return PHP_ICONV_ERR_UNKNOWN; } #else if (prev_in_left == in_left) { return PHP_ICONV_ERR_UNKNOWN; } #endif } #if !ICONV_SUPPORTS_ERRNO prev_in_left = in_left; #endif d.resize(d.size() + buf_growth - out_left); buf_growth <<= 1; } } else { for (;;) { out_left = buf_growth; out_p = d.appendCursor(out_left); if (iconv(cd, NULL, NULL, (char **)&out_p, &out_left) == (size_t)0) { d.resize(d.size() + buf_growth - out_left); break; } else { #if ICONV_SUPPORTS_ERRNO if (errno != E2BIG) { return PHP_ICONV_ERR_UNKNOWN; } #else if (out_left != 0) { return PHP_ICONV_ERR_UNKNOWN; } #endif } d.resize(d.size() + buf_growth - out_left); buf_growth <<= 1; } } return PHP_ICONV_ERR_SUCCESS; } static php_iconv_err_t _php_iconv_appendc(StringBuffer &d, const char c, iconv_t cd) { return _php_iconv_appendl(d, &c, 1, cd); } static php_iconv_err_t php_iconv_string(const char *in_p, size_t in_len, char **out, size_t *out_len, const char *out_charset, const char *in_charset) { #if !ICONV_SUPPORTS_ERRNO size_t in_size, out_size, out_left; char *out_buffer, *out_p; iconv_t cd; size_t result; *out = NULL; *out_len = 0; /** * This is not the right way to get output size... * This is not space efficient for large text. * This is also problem for encoding like UTF-7/UTF-8/ISO-2022 which * a single char can be more than 4 bytes. * I added 15 extra bytes for safety. <yohgaki@php.net> */ out_size = in_len * sizeof(int) + 15; out_left = out_size; in_size = in_len; cd = iconv_open_helper(out_charset, in_charset); if (cd == (iconv_t)(-1)) { return PHP_ICONV_ERR_UNKNOWN; } out_buffer = (char*)req::malloc_noptrs(out_size + 1); out_p = out_buffer; SCOPE_EXIT { if (*out == nullptr) req::free(out_buffer); }; result = iconv(cd, (char **)&in_p, &in_size, (char **)&out_p, &out_left); if (result == (size_t)(-1)) { return PHP_ICONV_ERR_UNKNOWN; } if (out_left < 8) { out_buffer = (char*)req::realloc_noptrs(out_buffer, out_size + 8); } // flush the shift-out sequences result = iconv(cd, NULL, NULL, &out_p, &out_left); if (result == (size_t)(-1)) { return PHP_ICONV_ERR_UNKNOWN; } *out_len = out_size - out_left; out_buffer[*out_len] = '\0'; *out = out_buffer; iconv_close(cd); return PHP_ICONV_ERR_SUCCESS; #else // iconv supports errno. Handle it better way. iconv_t cd; size_t in_left, out_size, out_left; char *out_p, *out_buf; size_t bsz, result = 0; php_iconv_err_t retval = PHP_ICONV_ERR_SUCCESS; int ignore_ilseq = boost::ends_with(out_charset, "//IGNORE") || boost::ends_with(out_charset, "//IGNORE//TRANSLIT"); *out = NULL; *out_len = 0; cd = iconv_open_helper(out_charset, in_charset); if (cd == (iconv_t)(-1)) { if (errno == EINVAL) { return PHP_ICONV_ERR_WRONG_CHARSET; } else { return PHP_ICONV_ERR_CONVERTER; } } in_left= in_len; out_left = in_len + 32; // Avoid realloc() most cases out_size = 0; bsz = out_left; out_buf = (char *)req::malloc_noptrs(bsz + 1); out_p = out_buf; SCOPE_EXIT { if (*out == nullptr) req::free(out_buf); }; while (in_left > 0) { result = iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **)&out_p, &out_left); out_size = bsz - out_left; if (result == (size_t)(-1)) { if (ignore_ilseq && errno == EILSEQ) { if (in_left <= 1) { result = 0; } else { errno = 0; in_p++; in_left--; continue; } } if (errno == E2BIG && in_left > 0) { // converted string is longer than out buffer bsz += in_len; out_buf = (char*)req::realloc_noptrs(out_buf, bsz + 1); out_p = out_buf; out_p += out_size; out_left = bsz - out_size; continue; } } break; } if (result != (size_t)(-1)) { // flush the shift-out sequences for (;;) { result = iconv(cd, NULL, NULL, (char **)&out_p, &out_left); out_size = bsz - out_left; if (result != (size_t)(-1)) { break; } if (errno == E2BIG) { bsz += 16; out_buf = (char *)req::realloc_noptrs(out_buf, bsz); out_p = out_buf; out_p += out_size; out_left = bsz - out_size; } else { break; } } } iconv_close(cd); if (result == (size_t)(-1)) { switch (errno) { case EINVAL: retval = PHP_ICONV_ERR_ILLEGAL_CHAR; break; case EILSEQ: retval = PHP_ICONV_ERR_ILLEGAL_SEQ; break; case E2BIG: // should not happen retval = PHP_ICONV_ERR_TOO_BIG; break; default: // other error retval = PHP_ICONV_ERR_UNKNOWN; return PHP_ICONV_ERR_UNKNOWN; } } *out_p = '\0'; *out = out_buf; *out_len = out_size; return retval; #endif } static php_iconv_err_t _php_iconv_strlen(unsigned int *pretval, const char *str, size_t nbytes, const char *enc) { char buf[GENERIC_SUPERSET_NBYTES*2]; php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS; iconv_t cd; const char *in_p; size_t in_left; char *out_p; size_t out_left; unsigned int cnt; *pretval = (unsigned int)-1; cd = iconv_open_helper(GENERIC_SUPERSET_NAME, enc); if (cd == (iconv_t)(-1)) { #if ICONV_SUPPORTS_ERRNO if (errno == EINVAL) { return PHP_ICONV_ERR_WRONG_CHARSET; } else { return PHP_ICONV_ERR_CONVERTER; } #else return PHP_ICONV_ERR_UNKNOWN; #endif } errno = out_left = 0; for (in_p = str, in_left = nbytes, cnt = 0; in_left > 0; cnt+=2) { size_t prev_in_left; out_p = buf; out_left = sizeof(buf); prev_in_left = in_left; if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **) &out_p, &out_left) == (size_t)-1) { if (prev_in_left == in_left) { break; } } } if (out_left > 0) { cnt -= out_left / GENERIC_SUPERSET_NBYTES; } #if ICONV_SUPPORTS_ERRNO switch (errno) { case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; break; case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; break; case E2BIG: case 0: *pretval = cnt; break; default: err = PHP_ICONV_ERR_UNKNOWN; break; } #else *pretval = cnt; #endif iconv_close(cd); return err; } static php_iconv_err_t _php_iconv_substr(StringBuffer &pretval, const char *str, size_t nbytes, int offset, int len, const char *enc) { char buf[GENERIC_SUPERSET_NBYTES]; php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS; iconv_t cd1, cd2; const char *in_p; size_t in_left; char *out_p; size_t out_left; unsigned int cnt; unsigned int total_len; err = _php_iconv_strlen(&total_len, str, nbytes, enc); if (err != PHP_ICONV_ERR_SUCCESS) { return err; } if (len < 0) { if ((len += (total_len - offset)) < 0) { return PHP_ICONV_ERR_SUCCESS; } } if (offset < 0) { if ((offset += total_len) < 0) { return PHP_ICONV_ERR_SUCCESS; } } if (len > (int)total_len) { len = total_len; } if (offset >= (int)total_len) { return PHP_ICONV_ERR_SUCCESS; } if ((offset + len) > (int)total_len ) { /* trying to compute the length */ len = total_len - offset; } if (len == 0) { return PHP_ICONV_ERR_SUCCESS; } cd1 = iconv_open_helper(GENERIC_SUPERSET_NAME, enc); if (cd1 == (iconv_t)(-1)) { #if ICONV_SUPPORTS_ERRNO if (errno == EINVAL) { return PHP_ICONV_ERR_WRONG_CHARSET; } else { return PHP_ICONV_ERR_CONVERTER; } #else return PHP_ICONV_ERR_UNKNOWN; #endif } cd2 = (iconv_t)NULL; errno = 0; for (in_p = str, in_left = nbytes, cnt = 0; in_left > 0 && len > 0; ++cnt) { size_t prev_in_left; out_p = buf; out_left = sizeof(buf); prev_in_left = in_left; if (iconv(cd1, (ICONV_CONST char **)&in_p, &in_left, (char **) &out_p, &out_left) == (size_t)-1) { if (prev_in_left == in_left) { break; } } if (cnt >= (unsigned int)offset) { if (cd2 == (iconv_t)NULL) { cd2 = iconv_open_helper(enc, GENERIC_SUPERSET_NAME); if (cd2 == (iconv_t)(-1)) { cd2 = (iconv_t)NULL; #if ICONV_SUPPORTS_ERRNO if (errno == EINVAL) { err = PHP_ICONV_ERR_WRONG_CHARSET; } else { err = PHP_ICONV_ERR_CONVERTER; } #else err = PHP_ICONV_ERR_UNKNOWN; #endif break; } } if (_php_iconv_appendl(pretval, buf, sizeof(buf), cd2) != PHP_ICONV_ERR_SUCCESS) { break; } --len; } } #if ICONV_SUPPORTS_ERRNO switch (errno) { case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; break; case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; break; case E2BIG: break; } #endif if (err == PHP_ICONV_ERR_SUCCESS) { if (cd2 != (iconv_t)NULL) { _php_iconv_appendl(pretval, NULL, 0, cd2); } } if (cd1 != (iconv_t)NULL) { iconv_close(cd1); } if (cd2 != (iconv_t)NULL) { iconv_close(cd2); } return err; } static php_iconv_err_t _php_iconv_strpos(unsigned int *pretval, const char *haystk, size_t haystk_nbytes, const char *ndl, size_t ndl_nbytes, int offset, const char *enc) { #define _php_iconv_memequal(a, b, c) \ ((c) == sizeof(uint64_t) \ ? (a).buf_64 == *((uint64_t *)(b)) \ : ((c) == sizeof(uint32_t) \ ? (a).buf_32 == *((uint32_t *)(b)) \ : memcmp((a).buf, b, c) == 0)) union gsnb_t { char buf[GENERIC_SUPERSET_NBYTES]; uint32_t buf_32; uint64_t buf_64; } x; php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS; iconv_t cd; const char *in_p; size_t in_left; char *out_p; size_t out_left; unsigned int cnt; char* ndl_buf = nullptr; const char *ndl_buf_p; size_t ndl_buf_len, ndl_buf_left; unsigned int match_ofs; *pretval = (unsigned int)-1; err = php_iconv_string(ndl, ndl_nbytes, &ndl_buf, &ndl_buf_len, GENERIC_SUPERSET_NAME, enc); SCOPE_EXIT { req::free(ndl_buf); }; if (err != PHP_ICONV_ERR_SUCCESS) { return err; } cd = iconv_open_helper(GENERIC_SUPERSET_NAME, enc); if (cd == (iconv_t)(-1)) { #if ICONV_SUPPORTS_ERRNO if (errno == EINVAL) { return PHP_ICONV_ERR_WRONG_CHARSET; } else { return PHP_ICONV_ERR_CONVERTER; } #else return PHP_ICONV_ERR_UNKNOWN; #endif } ndl_buf_p = ndl_buf; ndl_buf_left = ndl_buf_len; match_ofs = (unsigned int)-1; for (in_p = haystk, in_left = haystk_nbytes, cnt = 0; in_left > 0; ++cnt) { size_t prev_in_left; out_p = x.buf; out_left = sizeof(x.buf); prev_in_left = in_left; if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **) &out_p, &out_left) == (size_t)-1) { if (prev_in_left == in_left) { #if ICONV_SUPPORTS_ERRNO switch (errno) { case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; break; case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; break; case E2BIG: break; default: err = PHP_ICONV_ERR_UNKNOWN; break; } #endif break; } } if (offset >= 0) { if (cnt >= (unsigned int)offset) { if (_php_iconv_memequal(x, ndl_buf_p, sizeof(x.buf))) { if (match_ofs == (unsigned int)-1) { match_ofs = cnt; } ndl_buf_p += GENERIC_SUPERSET_NBYTES; ndl_buf_left -= GENERIC_SUPERSET_NBYTES; if (ndl_buf_left == 0) { *pretval = match_ofs; break; } } else { unsigned int i, j, lim; i = 0; j = GENERIC_SUPERSET_NBYTES; lim = (unsigned int)(ndl_buf_p - ndl_buf); while (j < lim) { if (_php_iconv_memequal(*(gsnb_t*)&ndl_buf[j], &ndl_buf[i], GENERIC_SUPERSET_NBYTES)) { i += GENERIC_SUPERSET_NBYTES; } else { j -= i; i = 0; } j += GENERIC_SUPERSET_NBYTES; } if (_php_iconv_memequal(x, &ndl_buf[i], sizeof(x.buf))) { match_ofs += (lim - i) / GENERIC_SUPERSET_NBYTES; i += GENERIC_SUPERSET_NBYTES; ndl_buf_p = &ndl_buf[i]; ndl_buf_left = ndl_buf_len - i; } else { match_ofs = (unsigned int)-1; ndl_buf_p = ndl_buf; ndl_buf_left = ndl_buf_len; } } } } else { if (_php_iconv_memequal(x, ndl_buf_p, sizeof(x.buf))) { if (match_ofs == (unsigned int)-1) { match_ofs = cnt; } ndl_buf_p += GENERIC_SUPERSET_NBYTES; ndl_buf_left -= GENERIC_SUPERSET_NBYTES; if (ndl_buf_left == 0) { *pretval = match_ofs; ndl_buf_p = ndl_buf; ndl_buf_left = ndl_buf_len; match_ofs = (unsigned int)-1; } } else { unsigned int i, j, lim; i = 0; j = GENERIC_SUPERSET_NBYTES; lim = (unsigned int)(ndl_buf_p - ndl_buf); while (j < lim) { if (_php_iconv_memequal(*(gsnb_t*)&ndl_buf[j], &ndl_buf[i], GENERIC_SUPERSET_NBYTES)) { i += GENERIC_SUPERSET_NBYTES; } else { j -= i; i = 0; } j += GENERIC_SUPERSET_NBYTES; } if (_php_iconv_memequal(x, &ndl_buf[i], sizeof(x.buf))) { match_ofs += (lim - i) / GENERIC_SUPERSET_NBYTES; i += GENERIC_SUPERSET_NBYTES; ndl_buf_p = &ndl_buf[i]; ndl_buf_left = ndl_buf_len - i; } else { match_ofs = (unsigned int)-1; ndl_buf_p = ndl_buf; ndl_buf_left = ndl_buf_len; } } } } iconv_close(cd); return err; } static php_iconv_err_t _php_iconv_mime_decode(StringBuffer &retval, const char *str, size_t str_nbytes, const char *enc, const char **next_pos, int mode) { php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS; iconv_t cd = (iconv_t)(-1), cd_pl = (iconv_t)(-1); const char *p1; size_t str_left; unsigned int scan_stat = 0; const char *csname = NULL; size_t csname_len; const char *encoded_text = NULL; size_t encoded_text_len = 0; const char *encoded_word = NULL; const char *spaces = NULL; php_iconv_enc_scheme_t enc_scheme = PHP_ICONV_ENC_SCHEME_BASE64; if (next_pos != NULL) { *next_pos = NULL; } cd_pl = iconv_open_helper(enc, "ASCII"); if (cd_pl == (iconv_t)(-1)) { #if ICONV_SUPPORTS_ERRNO if (errno == EINVAL) { err = PHP_ICONV_ERR_WRONG_CHARSET; } else { err = PHP_ICONV_ERR_CONVERTER; } #else err = PHP_ICONV_ERR_UNKNOWN; #endif goto out; } p1 = str; for (str_left = str_nbytes; str_left > 0; str_left--, p1++) { int eos = 0; switch (scan_stat) { case 0: /* expecting any character */ switch (*p1) { case '\r': /* part of an EOL sequence? */ scan_stat = 7; break; case '\n': scan_stat = 8; break; case '=': /* first letter of an encoded chunk */ encoded_word = p1; scan_stat = 1; break; case ' ': case '\t': /* a chunk of whitespaces */ spaces = p1; scan_stat = 11; break; default: /* first letter of a non-encoded word */ _php_iconv_appendc(retval, *p1, cd_pl); encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { scan_stat = 12; } break; } break; case 1: /* expecting a delimiter */ if (*p1 != '?') { err = _php_iconv_appendl(retval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { scan_stat = 12; } else { scan_stat = 0; } break; } csname = p1 + 1; scan_stat = 2; break; case 2: /* expecting a charset name */ switch (*p1) { case '?': /* normal delimiter: encoding scheme follows */ scan_stat = 3; break; case '*': /* new style delimiter: locale id follows */ scan_stat = 10; break; } if (scan_stat != 2) { char tmpbuf[80]; if (csname == NULL) { err = PHP_ICONV_ERR_MALFORMED; goto out; } csname_len = (size_t)(p1 - csname); if (csname_len > sizeof(tmpbuf) - 1) { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { err = _php_iconv_appendl(retval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { scan_stat = 12; } else { scan_stat = 0; } break; } else { err = PHP_ICONV_ERR_MALFORMED; goto out; } } memcpy(tmpbuf, csname, csname_len); tmpbuf[csname_len] = '\0'; if (cd != (iconv_t)(-1)) { iconv_close(cd); } cd = iconv_open_helper(enc, tmpbuf); if (cd == (iconv_t)(-1)) { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { auto qmarks = 2; while (qmarks > 0 && str_left > 1) { p1++; qmarks -= *p1 == '?' ? 1 : 0 ; str_left--; } if (*(p1+1) == '=') { ++p1; --str_left; } err = _php_iconv_appendl(retval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } scan_stat = 12; break; } else { #if ICONV_SUPPORTS_ERRNO if (errno == EINVAL) { err = PHP_ICONV_ERR_WRONG_CHARSET; } else { err = PHP_ICONV_ERR_CONVERTER; } #else err = PHP_ICONV_ERR_UNKNOWN; #endif goto out; } } } break; case 3: /* expecting a encoding scheme specifier */ switch (*p1) { case 'b': case 'B': enc_scheme = PHP_ICONV_ENC_SCHEME_BASE64; scan_stat = 4; break; case 'q': case 'Q': enc_scheme = PHP_ICONV_ENC_SCHEME_QPRINT; scan_stat = 4; break; default: if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { err = _php_iconv_appendl(retval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { scan_stat = 12; } else { scan_stat = 0; } break; } else { err = PHP_ICONV_ERR_MALFORMED; goto out; } } break; case 4: /* expecting a delimiter */ if (*p1 != '?') { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { /* pass the entire chunk through the converter */ err = _php_iconv_appendl(retval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { scan_stat = 12; } else { scan_stat = 0; } break; } else { err = PHP_ICONV_ERR_MALFORMED; goto out; } } encoded_text = p1 + 1; scan_stat = 5; break; case 5: /* expecting an encoded portion */ if (*p1 == '?') { encoded_text_len = (size_t)(p1 - encoded_text); scan_stat = 6; } break; case 7: /* expecting a "\n" character */ if (*p1 == '\n') { scan_stat = 8; } else { /* bare CR */ _php_iconv_appendc(retval, '\r', cd_pl); _php_iconv_appendc(retval, *p1, cd_pl); scan_stat = 0; } break; case 8: /* checking whether the following line is part of a folded header */ if (*p1 != ' ' && *p1 != '\t') { --p1; str_left = 1; /* quit_loop */ break; } if (encoded_word == NULL) { _php_iconv_appendc(retval, ' ', cd_pl); } spaces = NULL; scan_stat = 11; break; case 6: /* expecting a End-Of-Chunk character "=" */ if (*p1 != '=') { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { /* pass the entire chunk through the converter */ err = _php_iconv_appendl(retval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { scan_stat = 12; } else { scan_stat = 0; } break; } else { err = PHP_ICONV_ERR_MALFORMED; goto out; } } scan_stat = 9; if (str_left == 1) { eos = 1; } else { break; } case 9: /* choice point, seeing what to do next.*/ switch (*p1) { default: /* Handle non-RFC-compliant formats * * RFC2047 requires the character that comes right * after an encoded word (chunk) to be a whitespace, * while there are lots of broken implementations that * generate such malformed headers that don't fulfill * that requirement. */ if (!eos) { if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { /* pass the entire chunk through the converter */ err = _php_iconv_appendl(retval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } scan_stat = 12; break; } } /* break is omitted intentionally */ case '\r': case '\n': case ' ': case '\t': { String decoded; switch (enc_scheme) { case PHP_ICONV_ENC_SCHEME_BASE64: { int len = encoded_text_len; decoded = string_base64_decode(encoded_text, len, false); } break; case PHP_ICONV_ENC_SCHEME_QPRINT: { int len = encoded_text_len; decoded = string_quoted_printable_decode(encoded_text, len, true); } break; default: break; } if (decoded.isNull()) { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { /* pass the entire chunk through the converter */ err = _php_iconv_appendl(retval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl); if (err != PHP_ICONV_ERR_SUCCESS) { goto out; } encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { scan_stat = 12; } else { scan_stat = 0; } break; } else { err = PHP_ICONV_ERR_UNKNOWN; goto out; } } err = _php_iconv_appendl(retval, decoded.data(), decoded.size(), cd); if (err != PHP_ICONV_ERR_SUCCESS) { if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { /* pass the entire chunk through the converter */ err = _php_iconv_appendl(retval, encoded_word, (size_t)(p1 - encoded_word), cd_pl); encoded_word = nullptr; if (err != PHP_ICONV_ERR_SUCCESS) { break; } } else { goto out; } } if (eos) { /* reached end-of-string. done. */ scan_stat = 0; break; } switch (*p1) { case '\r': /* part of an EOL sequence? */ scan_stat = 7; break; case '\n': scan_stat = 8; break; case '=': /* first letter of an encoded chunk */ scan_stat = 1; break; case ' ': case '\t': /* medial whitespaces */ spaces = p1; scan_stat = 11; break; default: /* first letter of a non-encoded word */ _php_iconv_appendc(retval, *p1, cd_pl); scan_stat = 12; break; } } break; } break; case 10: /* expects a language specifier. dismiss it for now */ if (*p1 == '?') { scan_stat = 3; } break; case 11: /* expecting a chunk of whitespaces */ switch (*p1) { case '\r': /* part of an EOL sequence? */ scan_stat = 7; break; case '\n': scan_stat = 8; break; case '=': /* first letter of an encoded chunk */ if (spaces != NULL && encoded_word == NULL) { _php_iconv_appendl(retval, spaces, (size_t)(p1 - spaces), cd_pl); spaces = NULL; } encoded_word = p1; scan_stat = 1; break; case ' ': case '\t': break; default: /* first letter of a non-encoded word */ if (spaces != NULL) { _php_iconv_appendl(retval, spaces, (size_t)(p1 - spaces), cd_pl); spaces = NULL; } _php_iconv_appendc(retval, *p1, cd_pl); encoded_word = NULL; if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) { scan_stat = 12; } else { scan_stat = 0; } break; } break; case 12: /* expecting a non-encoded word */ switch (*p1) { case '\r': /* part of an EOL sequence? */ scan_stat = 7; break; case '\n': scan_stat = 8; break; case ' ': case '\t': spaces = p1; scan_stat = 11; break; case '=': /* first letter of an encoded chunk */ if (!(mode & PHP_ICONV_MIME_DECODE_STRICT)) { encoded_word = p1; scan_stat = 1; break; } /* break is omitted intentionally */ default: _php_iconv_appendc(retval, *p1, cd_pl); break; } break; } } switch (scan_stat) { case 0: case 8: case 11: case 12: break; default: if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) { if (scan_stat == 1) { _php_iconv_appendc(retval, '=', cd_pl); } err = PHP_ICONV_ERR_SUCCESS; } else { err = PHP_ICONV_ERR_MALFORMED; goto out; } } if (next_pos != NULL) { *next_pos = p1; } out: if (cd != (iconv_t)(-1)) { iconv_close(cd); } if (cd_pl != (iconv_t)(-1)) { iconv_close(cd_pl); } return err; } /////////////////////////////////////////////////////////////////////////////// const StaticString s_scheme("scheme"), s_input_charset("input-charset"), s_output_charset("output-charset"), s_line_length("line-length"), s_line_break_chars("line-break-chars"); static Variant HHVM_FUNCTION(iconv_mime_encode, const String& field_name, const String& field_value, const Variant& preferences /* = uninit_variant */) { php_iconv_enc_scheme_t scheme_id = PHP_ICONV_ENC_SCHEME_BASE64; String in_charset; String out_charset; long line_len = 76; String lfchars = "\r\n"; StringBuffer ret; char *buf = NULL; if (!preferences.isNull()) { Variant scheme = preferences.toArray()[s_scheme]; if (scheme.isString()) { String s = scheme.toString(); switch (*s.data()) { case 'B': case 'b': scheme_id = PHP_ICONV_ENC_SCHEME_BASE64; break; case 'Q': case 'q': scheme_id = PHP_ICONV_ENC_SCHEME_QPRINT; break; } } Variant input_charset = preferences.toArray()[s_input_charset]; if (input_charset.isString()) { in_charset = input_charset.toString(); if (!validate_charset(in_charset)) return false; } Variant output_charset = preferences.toArray()[s_output_charset]; if (output_charset.isString()) { out_charset = output_charset.toString(); if (!validate_charset(out_charset)) return false; } Variant line_length = preferences.toArray()[s_line_length]; if (!line_length.isNull()) { line_len = line_length.toInt64(); } Variant line_break_chars = preferences.toArray()[s_line_break_chars]; if (!line_break_chars.isNull()) { lfchars = line_break_chars.toString(); } } static int qp_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x00 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x10 */ 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x20 */ 1, 1, 1, 1, 1, 1, 1 ,1, 1, 1, 1, 1, 1, 3, 1, 3, /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, /* 0x70 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x80 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x90 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xA0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xB0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xC0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xD0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xE0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 /* 0xF0 */ }; php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS; iconv_t cd = (iconv_t)(-1), cd_pl = (iconv_t)(-1); if ((field_name.size() + 2) >= line_len || (out_charset.size() + 12) >= line_len) { /* field name is too long */ err = PHP_ICONV_ERR_TOO_BIG; goto out; } cd_pl = iconv_open_helper("ASCII", in_charset.data()); if (cd_pl == (iconv_t)(-1)) { #if ICONV_SUPPORTS_ERRNO if (errno == EINVAL) { err = PHP_ICONV_ERR_WRONG_CHARSET; } else { err = PHP_ICONV_ERR_CONVERTER; } #else err = PHP_ICONV_ERR_UNKNOWN; #endif goto out; } cd = iconv_open_helper(out_charset.data(), in_charset.data()); if (cd == (iconv_t)(-1)) { #if ICONV_SUPPORTS_ERRNO if (errno == EINVAL) { err = PHP_ICONV_ERR_WRONG_CHARSET; } else { err = PHP_ICONV_ERR_CONVERTER; } #else err = PHP_ICONV_ERR_UNKNOWN; #endif goto out; } const char *in_p; size_t in_left; char *out_p; size_t out_left; buf = (char*)req::malloc_noptrs(line_len + 5); unsigned int char_cnt; char_cnt = line_len; _php_iconv_appendl(ret, field_name.data(), field_name.size(), cd_pl); char_cnt -= field_name.size(); ret.append(": "); char_cnt -= 2; in_p = field_value.data(); in_left = field_value.size(); do { size_t prev_in_left; size_t out_size; if ((int)char_cnt < (out_charset.size() + 12)) { ret.append(lfchars); // lfchars must be encoded in ASCII here ret.append(' '); char_cnt = line_len - 1; } ret.append("=?"); char_cnt -= 2; ret.append(out_charset); char_cnt -= out_charset.size(); ret.append('?'); char_cnt --; switch (scheme_id) { case PHP_ICONV_ENC_SCHEME_BASE64: { size_t ini_in_left; const char *ini_in_p; size_t out_reserved = 4; ret.append('B'); char_cnt--; ret.append('?'); char_cnt--; prev_in_left = ini_in_left = in_left; ini_in_p = in_p; out_size = (char_cnt - 2) / 4 * 3; for (;;) { out_p = buf; if (out_size <= out_reserved) { err = PHP_ICONV_ERR_TOO_BIG; goto out; } out_left = out_size - out_reserved; if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **)&out_p, &out_left) == (size_t)-1) { #if ICONV_SUPPORTS_ERRNO switch (errno) { case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; goto out; case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; goto out; case E2BIG: if (prev_in_left == in_left) { err = PHP_ICONV_ERR_TOO_BIG; goto out; } break; default: err = PHP_ICONV_ERR_UNKNOWN; goto out; } #else if (prev_in_left == in_left) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } #endif } out_left += out_reserved; if (iconv(cd, NULL, NULL, (char **)&out_p, &out_left) == (size_t)-1) { #if ICONV_SUPPORTS_ERRNO if (errno != E2BIG) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } #else if (out_left != 0) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } #endif } else { break; } if (iconv(cd, NULL, NULL, NULL, NULL) == (size_t)-1) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } out_reserved += 4; in_left = ini_in_left; in_p = ini_in_p; } prev_in_left = in_left; int encoded_len = out_size - out_left; String encoded = string_base64_encode(buf, encoded_len); if ((int)char_cnt < encoded.size()) { /* something went wrong! */ err = PHP_ICONV_ERR_UNKNOWN; goto out; } ret.append(encoded); char_cnt -= encoded.size(); ret.append("?="); char_cnt -= 2; } break; /* case PHP_ICONV_ENC_SCHEME_BASE64: */ case PHP_ICONV_ENC_SCHEME_QPRINT: { size_t ini_in_left; const char *ini_in_p; const unsigned char *p; size_t nbytes_required; ret.append('Q'); char_cnt--; ret.append('?'); char_cnt--; prev_in_left = ini_in_left = in_left; ini_in_p = in_p; for (out_size = (char_cnt - 2) / 3; out_size > 0;) { size_t prev_out_left ATTRIBUTE_UNUSED; nbytes_required = 0; out_p = buf; out_left = out_size; if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **)&out_p, &out_left) == (size_t)-1) { #if ICONV_SUPPORTS_ERRNO switch (errno) { case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; goto out; case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; goto out; case E2BIG: if (prev_in_left == in_left) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } break; default: err = PHP_ICONV_ERR_UNKNOWN; goto out; } #else if (prev_in_left == in_left) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } #endif } prev_out_left = out_left; if (iconv(cd, NULL, NULL, (char **)&out_p, &out_left) == (size_t)-1) { #if ICONV_SUPPORTS_ERRNO if (errno != E2BIG) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } #else if (out_left == prev_out_left) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } #endif } for (p = (unsigned char *)buf; p < (unsigned char *)out_p; p++) { nbytes_required += qp_table[*p]; } if (nbytes_required <= char_cnt - 2) { break; } out_size -= ((nbytes_required - (char_cnt - 2)) + 1) / (3 - 1); in_left = ini_in_left; in_p = ini_in_p; } for (p = (unsigned char *)buf; p < (unsigned char *)out_p; p++) { if (qp_table[*p] == 1) { ret.append(*(char*)p); char_cnt--; } else { static char qp_digits[] = "0123456789ABCDEF"; ret.append('='); ret.append(qp_digits[(*p >> 4) & 0x0f]); ret.append(qp_digits[(*p & 0x0f)]); char_cnt -= 3; } } prev_in_left = in_left; ret.append("?="); char_cnt -= 2; if (iconv(cd, NULL, NULL, NULL, NULL) == (size_t)-1) { err = PHP_ICONV_ERR_UNKNOWN; goto out; } } break; /* case PHP_ICONV_ENC_SCHEME_QPRINT: */ } } while (in_left > 0); out: if (cd != (iconv_t)(-1)) { iconv_close(cd); } if (cd_pl != (iconv_t)(-1)) { iconv_close(cd_pl); } if (buf != NULL) { req::free(buf); } if (err != PHP_ICONV_ERR_SUCCESS) { return false; } return ret.detach(); } static Variant HHVM_FUNCTION(iconv_mime_decode, const String& encoded_string, int64_t mode /* = 0 */, const Variant& charset /* = null_string */) { Variant encoded = check_charset(charset); if (same(encoded, false)) return false; String enc = encoded.toString(); StringBuffer retval; php_iconv_err_t err = _php_iconv_mime_decode(retval, encoded_string.data(), encoded_string.size(), enc.data(), NULL, mode); _php_iconv_show_error(__FUNCTION__+2, err, enc.data(), "???"); if (err == PHP_ICONV_ERR_SUCCESS) { return retval.detach(); } return false; } static Variant HHVM_FUNCTION(iconv_mime_decode_headers, const String& encoded_headers, int64_t mode /* = 0 */, const Variant& charset /* = null_string */) { Variant encoded = check_charset(charset); if (same(encoded, false)) return false; String enc = encoded.toString(); Array ret = Array::CreateDict(); php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS; const char *encoded_str = encoded_headers.data(); int encoded_str_len = encoded_headers.size(); while (encoded_str_len > 0) { StringBuffer decoded_header; const char *header_name = NULL; size_t header_name_len = 0; const char *header_value = NULL; size_t header_value_len = 0; const char *p, *limit; const char *next_pos; err = _php_iconv_mime_decode(decoded_header, encoded_str, encoded_str_len, enc.data(), &next_pos, mode); if (err != PHP_ICONV_ERR_SUCCESS || decoded_header.data() == NULL) { break; } limit = decoded_header.data() + decoded_header.size(); for (p = decoded_header.data(); p < limit; p++) { if (*p == ':') { *((char*)p) = '\0'; header_name = decoded_header.data(); header_name_len = p - decoded_header.data(); while (++p < limit) { if (*p != ' ' && *p != '\t') { break; } } header_value = p; header_value_len = limit - p; break; } } if (header_name != NULL) { String header(header_name, header_name_len, CopyString); String value(header_value, header_value_len, CopyString); if (ret.exists(header)) { Variant elem = ret[header]; if (!elem.isArray()) { ret.set(header, make_vec_array(elem, value)); } else { elem.asArrRef().append(value); ret.set(header, elem); } } else { ret.set(header, value); } } encoded_str_len -= next_pos - encoded_str; encoded_str = next_pos; } if (err != PHP_ICONV_ERR_SUCCESS) { _php_iconv_show_error(__FUNCTION__+2, err, enc.data(), "???"); return false; } if (ret.empty()) { return init_null(); } return ret; } const StaticString s_input_encoding("input_encoding"), s_output_encoding("output_encoding"), s_internal_encoding("internal_encoding"), s_all("all"); static Variant HHVM_FUNCTION(iconv_get_encoding, const String& type /* = "all" */) { if (type == s_all) { return make_dict_array( s_input_encoding, ICONVG(input_encoding), s_output_encoding, ICONVG(output_encoding), s_internal_encoding, ICONVG(internal_encoding) ); } if (type == s_input_encoding) return ICONVG(input_encoding); if (type == s_output_encoding) return ICONVG(output_encoding); if (type == s_internal_encoding) return ICONVG(internal_encoding); return false; } static bool HHVM_FUNCTION(iconv_set_encoding, const String& type, const String& charset) { if (!validate_charset(charset)) return false; if (type == s_input_encoding) { ICONVG(input_encoding) = charset; } else if (type == s_output_encoding) { ICONVG(output_encoding) = charset; } else if (type == s_internal_encoding) { ICONVG(internal_encoding) = charset; } else { return false; } return true; } static Variant HHVM_FUNCTION(iconv, const String& in_charset, const String& out_charset, const String& str) { if (!validate_charset(in_charset)) return false; if (!validate_charset(out_charset)) return false; char* out_buffer = nullptr; size_t out_len; php_iconv_err_t err = php_iconv_string(str.data(), str.size(), &out_buffer, &out_len, out_charset.data(), in_charset.data()); SCOPE_EXIT { req::free(out_buffer); }; _php_iconv_show_error(__FUNCTION__+2, err, out_charset.data(), in_charset.data()); if (err == PHP_ICONV_ERR_SUCCESS && out_buffer != nullptr) { return String(out_buffer, out_len, CopyString); } return false; } static Variant HHVM_FUNCTION(iconv_strlen, const String& str, const Variant& charset /* = null_string */) { Variant encoded = check_charset(charset); if (same(encoded, false)) return false; String enc = encoded.toString(); unsigned int retval; php_iconv_err_t err = _php_iconv_strlen(&retval, str.data(), str.size(), enc.data()); _php_iconv_show_error(__FUNCTION__+2, err, GENERIC_SUPERSET_NAME, enc.data()); if (err == PHP_ICONV_ERR_SUCCESS) { return (int64_t)retval; } return false; } static Variant HHVM_FUNCTION(iconv_strpos, const String& haystack, const String& needle, int64_t offset /* = 0 */, const Variant& charset /* = null_string */) { if (offset < 0) { raise_warning("Offset not contained in string."); return false; } if (needle.size() < 1) { return false; } Variant encoded = check_charset(charset); if (same(encoded, false)) return false; String enc = encoded.toString(); unsigned int retval; php_iconv_err_t err = _php_iconv_strpos(&retval, haystack.data(), haystack.size(), needle.data(), needle.size(), offset, enc.data()); _php_iconv_show_error(__FUNCTION__+2, err, GENERIC_SUPERSET_NAME, enc.data()); if (err == PHP_ICONV_ERR_SUCCESS && retval != (unsigned int)-1) { return (long)retval; } return false; } static Variant HHVM_FUNCTION(iconv_strrpos, const String& haystack, const String& needle, const Variant& charset /* = null_string */) { if (needle.size() < 1) { return false; } Variant encoded = check_charset(charset); if (same(encoded, false)) return false; String enc = encoded.toString(); unsigned int retval; php_iconv_err_t err = _php_iconv_strpos(&retval, haystack.data(), haystack.size(), needle.data(), needle.size(), -1, enc.data()); _php_iconv_show_error(__FUNCTION__+2, err, GENERIC_SUPERSET_NAME, enc.data()); if (err == PHP_ICONV_ERR_SUCCESS && retval != (unsigned int)-1) { return (long)retval; } return false; } static Variant HHVM_FUNCTION(iconv_substr, const String& str, int64_t offset, int64_t length /* = INT_MAX */, const Variant& charset /* = null_string */) { Variant encoded = check_charset(charset); if (same(encoded, false)) return false; String enc = encoded.toString(); length = length <= INT32_MAX ? length : INT32_MAX; StringBuffer retval; php_iconv_err_t err = _php_iconv_substr(retval, str.data(), str.size(), offset, length, enc.data()); _php_iconv_show_error(__FUNCTION__+2, err, GENERIC_SUPERSET_NAME, enc.data()); if (err == PHP_ICONV_ERR_SUCCESS && !str.empty() && retval.data()) { return retval.detach(); } return false; } static String HHVM_FUNCTION(ob_iconv_handler, const String& contents, int64_t /*status*/) { String mimetype = g_context->getMimeType(); if (!mimetype.empty()) { char* out_buffer = nullptr; size_t out_len; php_iconv_err_t err = php_iconv_string(contents.data(), contents.size(), &out_buffer, &out_len, ICONVG(output_encoding).c_str(), ICONVG(internal_encoding).c_str()); SCOPE_EXIT { req::free(out_buffer); }; _php_iconv_show_error(__FUNCTION__+2, err, ICONVG(output_encoding).c_str(), ICONVG(internal_encoding).c_str()); if (out_buffer != NULL) { g_context->setContentType(mimetype, ICONVG(output_encoding)); return String(out_buffer, out_len, CopyString); } } return contents; } /////////////////////////////////////////////////////////////////////////////// #ifdef _LIBICONV_VERSION const char* iconv_impl() { return "libiconv"; }; std::string iconv_version() { return folly::sformat("{}.{}", _LIBICONV_VERSION >> 8, _LIBICONV_VERSION & 255); } #else const char* iconv_impl() { return "glibc"; }; #ifdef __GLIBC__ std::string iconv_version() { return folly::sformat("{}.{}", __GLIBC__, __GLIBC_MINOR__); } #else const char* iconv_version() { return "2.5"; } #endif #endif struct iconvExtension final : Extension { iconvExtension() : Extension("iconv") {} void moduleInit() override { HHVM_RC_STR(ICONV_IMPL, iconv_impl()); HHVM_RC_INT(ICONV_MIME_DECODE_CONTINUE_ON_ERROR, 2); HHVM_RC_INT(ICONV_MIME_DECODE_STRICT, 1); HHVM_RC_STR(ICONV_VERSION, iconv_version()); HHVM_FE(iconv_get_encoding); HHVM_FE(iconv_mime_decode_headers); HHVM_FE(iconv_mime_decode); HHVM_FE(iconv_mime_encode); HHVM_FE(iconv_set_encoding); HHVM_FE(iconv_strlen); HHVM_FE(iconv_strpos); HHVM_FE(iconv_strrpos); HHVM_FE(iconv_substr); HHVM_FE(iconv); HHVM_FE(ob_iconv_handler); loadSystemlib(); } } s_iconv_extension; ///////////////////////////////////////////////////////////////////////////// }