static php_iconv_err_t _php_iconv_mime_decode()

in hphp/runtime/ext/iconv/ext_iconv.cpp [839:1352]


static php_iconv_err_t _php_iconv_mime_decode(StringBuffer &retval,
                                              const char *str,
                                              size_t str_nbytes,
                                              const char *enc,
                                              const char **next_pos,
                                              int mode) {
  php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS;

  iconv_t cd = (iconv_t)(-1), cd_pl = (iconv_t)(-1);

  const char *p1;
  size_t str_left;
  unsigned int scan_stat = 0;
  const char *csname = NULL;
  size_t csname_len;
  const char *encoded_text = NULL;
  size_t encoded_text_len = 0;
  const char *encoded_word = NULL;
  const char *spaces = NULL;

  php_iconv_enc_scheme_t enc_scheme = PHP_ICONV_ENC_SCHEME_BASE64;

  if (next_pos != NULL) {
    *next_pos = NULL;
  }
  cd_pl = iconv_open_helper(enc, "ASCII");

  if (cd_pl == (iconv_t)(-1)) {
#if ICONV_SUPPORTS_ERRNO
    if (errno == EINVAL) {
      err = PHP_ICONV_ERR_WRONG_CHARSET;
    } else {
      err = PHP_ICONV_ERR_CONVERTER;
    }
#else
    err = PHP_ICONV_ERR_UNKNOWN;
#endif
    goto out;
  }

  p1 = str;
  for (str_left = str_nbytes; str_left > 0; str_left--, p1++) {
    int eos = 0;

    switch (scan_stat) {
    case 0: /* expecting any character */
      switch (*p1) {
      case '\r': /* part of an EOL sequence? */
        scan_stat = 7;
        break;

      case '\n':
        scan_stat = 8;
        break;

      case '=': /* first letter of an encoded chunk */
        encoded_word = p1;
        scan_stat = 1;
        break;

      case ' ': case '\t': /* a chunk of whitespaces */
        spaces = p1;
        scan_stat = 11;
        break;

      default: /* first letter of a non-encoded word */
        _php_iconv_appendc(retval, *p1, cd_pl);
        encoded_word = NULL;
        if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
          scan_stat = 12;
        }
        break;
      }
      break;

    case 1: /* expecting a delimiter */
      if (*p1 != '?') {
        err = _php_iconv_appendl(retval, encoded_word,
                                 (size_t)((p1 + 1) - encoded_word), cd_pl);
        if (err != PHP_ICONV_ERR_SUCCESS) {
          goto out;
        }
        encoded_word = NULL;
        if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
          scan_stat = 12;
        } else {
          scan_stat = 0;
        }
        break;
      }
      csname = p1 + 1;
      scan_stat = 2;
      break;

    case 2: /* expecting a charset name */
      switch (*p1) {
      case '?': /* normal delimiter: encoding scheme follows */
        scan_stat = 3;
        break;

      case '*': /* new style delimiter: locale id follows */
        scan_stat = 10;
        break;
      }
      if (scan_stat != 2) {
        char tmpbuf[80];

        if (csname == NULL) {
          err = PHP_ICONV_ERR_MALFORMED;
          goto out;
        }

        csname_len = (size_t)(p1 - csname);

        if (csname_len > sizeof(tmpbuf) - 1) {
          if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
            err = _php_iconv_appendl(retval, encoded_word,
                                     (size_t)((p1 + 1) - encoded_word), cd_pl);
            if (err != PHP_ICONV_ERR_SUCCESS) {
              goto out;
            }
            encoded_word = NULL;
            if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
              scan_stat = 12;
            } else {
              scan_stat = 0;
            }
            break;
          } else {
            err = PHP_ICONV_ERR_MALFORMED;
            goto out;
          }
        }

        memcpy(tmpbuf, csname, csname_len);
        tmpbuf[csname_len] = '\0';

        if (cd != (iconv_t)(-1)) {
          iconv_close(cd);
        }

        cd = iconv_open_helper(enc, tmpbuf);

        if (cd == (iconv_t)(-1)) {
          if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
            auto qmarks = 2;
            while (qmarks > 0 && str_left > 1) {
              p1++;
              qmarks -= *p1 == '?' ? 1 : 0 ;
              str_left--;
            }

            if (*(p1+1) == '=') {
              ++p1;
              --str_left;
            }

            err = _php_iconv_appendl(retval, encoded_word,
                                     (size_t)((p1 + 1) - encoded_word), cd_pl);
            if (err != PHP_ICONV_ERR_SUCCESS) {
              goto out;
            }
            scan_stat = 12;
            break;
          } else {
#if ICONV_SUPPORTS_ERRNO
            if (errno == EINVAL) {
              err = PHP_ICONV_ERR_WRONG_CHARSET;
            } else {
              err = PHP_ICONV_ERR_CONVERTER;
            }
#else
            err = PHP_ICONV_ERR_UNKNOWN;
#endif
            goto out;
          }
        }
      }
      break;

    case 3: /* expecting a encoding scheme specifier */
      switch (*p1) {
      case 'b':
      case 'B':
        enc_scheme = PHP_ICONV_ENC_SCHEME_BASE64;
        scan_stat = 4;
        break;

      case 'q':
      case 'Q':
        enc_scheme = PHP_ICONV_ENC_SCHEME_QPRINT;
        scan_stat = 4;
        break;

      default:
        if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
          err = _php_iconv_appendl(retval, encoded_word,
                                   (size_t)((p1 + 1) - encoded_word), cd_pl);
          if (err != PHP_ICONV_ERR_SUCCESS) {
            goto out;
          }
          encoded_word = NULL;
          if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
            scan_stat = 12;
          } else {
            scan_stat = 0;
          }
          break;
        } else {
          err = PHP_ICONV_ERR_MALFORMED;
          goto out;
        }
      }
      break;

    case 4: /* expecting a delimiter */
      if (*p1 != '?') {
        if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
          /* pass the entire chunk through the converter */
          err = _php_iconv_appendl(retval, encoded_word,
                                   (size_t)((p1 + 1) - encoded_word), cd_pl);
          if (err != PHP_ICONV_ERR_SUCCESS) {
            goto out;
          }
          encoded_word = NULL;
          if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
            scan_stat = 12;
          } else {
            scan_stat = 0;
          }
          break;
        } else {
          err = PHP_ICONV_ERR_MALFORMED;
          goto out;
        }
      }
      encoded_text = p1 + 1;
      scan_stat = 5;
      break;

    case 5: /* expecting an encoded portion */
      if (*p1 == '?') {
        encoded_text_len = (size_t)(p1 - encoded_text);
        scan_stat = 6;
      }
      break;

    case 7: /* expecting a "\n" character */
      if (*p1 == '\n') {
        scan_stat = 8;
      } else {
        /* bare CR */
        _php_iconv_appendc(retval, '\r', cd_pl);
        _php_iconv_appendc(retval, *p1, cd_pl);
        scan_stat = 0;
      }
      break;

    case 8: /* checking whether the following line is part of a
               folded header */
      if (*p1 != ' ' && *p1 != '\t') {
        --p1;
        str_left = 1; /* quit_loop */
        break;
      }
      if (encoded_word == NULL) {
        _php_iconv_appendc(retval, ' ', cd_pl);
      }
      spaces = NULL;
      scan_stat = 11;
      break;

    case 6: /* expecting a End-Of-Chunk character "=" */
      if (*p1 != '=') {
        if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
          /* pass the entire chunk through the converter */
          err = _php_iconv_appendl(retval, encoded_word,
                                   (size_t)((p1 + 1) - encoded_word), cd_pl);
          if (err != PHP_ICONV_ERR_SUCCESS) {
            goto out;
          }
          encoded_word = NULL;
          if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
            scan_stat = 12;
          } else {
            scan_stat = 0;
          }
          break;
        } else {
          err = PHP_ICONV_ERR_MALFORMED;
          goto out;
        }
      }
      scan_stat = 9;
      if (str_left == 1) {
        eos = 1;
      } else {
        break;
      }

    case 9: /* choice point, seeing what to do next.*/
      switch (*p1) {
      default:
        /* Handle non-RFC-compliant formats
         *
         * RFC2047 requires the character that comes right
         * after an encoded word (chunk) to be a whitespace,
         * while there are lots of broken implementations that
         * generate such malformed headers that don't fulfill
         * that requirement.
         */
        if (!eos) {
          if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
            /* pass the entire chunk through the converter */
            err = _php_iconv_appendl(retval, encoded_word,
                                     (size_t)((p1 + 1) - encoded_word), cd_pl);
            if (err != PHP_ICONV_ERR_SUCCESS) {
              goto out;
            }
            scan_stat = 12;
            break;
          }
        }
        /* break is omitted intentionally */

      case '\r': case '\n': case ' ': case '\t': {
        String decoded;
        switch (enc_scheme) {
        case PHP_ICONV_ENC_SCHEME_BASE64:
          {
            int len = encoded_text_len;
            decoded = string_base64_decode(encoded_text, len, false);
          }
          break;
        case PHP_ICONV_ENC_SCHEME_QPRINT:
          {
            int len = encoded_text_len;
            decoded = string_quoted_printable_decode(encoded_text, len, true);
          }
          break;
        default:
          break;
        }

        if (decoded.isNull()) {
          if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
            /* pass the entire chunk through the converter */
            err = _php_iconv_appendl(retval, encoded_word,
                                     (size_t)((p1 + 1) - encoded_word), cd_pl);
            if (err != PHP_ICONV_ERR_SUCCESS) {
              goto out;
            }
            encoded_word = NULL;
            if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
              scan_stat = 12;
            } else {
              scan_stat = 0;
            }
            break;
          } else {
            err = PHP_ICONV_ERR_UNKNOWN;
            goto out;
          }
        }

        err = _php_iconv_appendl(retval, decoded.data(), decoded.size(), cd);
        if (err != PHP_ICONV_ERR_SUCCESS) {
          if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
            /* pass the entire chunk through the converter */
            err = _php_iconv_appendl(retval, encoded_word,
                                     (size_t)(p1 - encoded_word), cd_pl);
            encoded_word = nullptr;
            if (err != PHP_ICONV_ERR_SUCCESS) {
              break;
            }
          } else {
            goto out;
          }
        }

        if (eos) { /* reached end-of-string. done. */
          scan_stat = 0;
          break;
        }

        switch (*p1) {
        case '\r': /* part of an EOL sequence? */
          scan_stat = 7;
          break;

        case '\n':
          scan_stat = 8;
          break;

        case '=': /* first letter of an encoded chunk */
          scan_stat = 1;
          break;

        case ' ': case '\t': /* medial whitespaces */
          spaces = p1;
          scan_stat = 11;
          break;

        default: /* first letter of a non-encoded word */
          _php_iconv_appendc(retval, *p1, cd_pl);
          scan_stat = 12;
          break;
        }
      } break;
      }
      break;

    case 10: /* expects a language specifier. dismiss it for now */
      if (*p1 == '?') {
        scan_stat = 3;
      }
      break;

    case 11: /* expecting a chunk of whitespaces */
      switch (*p1) {
      case '\r': /* part of an EOL sequence? */
        scan_stat = 7;
        break;

      case '\n':
        scan_stat = 8;
        break;

      case '=': /* first letter of an encoded chunk */
        if (spaces != NULL && encoded_word == NULL) {
          _php_iconv_appendl(retval, spaces, (size_t)(p1 - spaces), cd_pl);
          spaces = NULL;
        }
        encoded_word = p1;
        scan_stat = 1;
        break;

      case ' ': case '\t':
        break;

      default: /* first letter of a non-encoded word */
        if (spaces != NULL) {
          _php_iconv_appendl(retval, spaces, (size_t)(p1 - spaces), cd_pl);
          spaces = NULL;
        }
        _php_iconv_appendc(retval, *p1, cd_pl);
        encoded_word = NULL;
        if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
          scan_stat = 12;
        } else {
          scan_stat = 0;
        }
        break;
      }
      break;

    case 12: /* expecting a non-encoded word */
      switch (*p1) {
      case '\r': /* part of an EOL sequence? */
        scan_stat = 7;
        break;

      case '\n':
        scan_stat = 8;
        break;

      case ' ': case '\t':
        spaces = p1;
        scan_stat = 11;
        break;

      case '=': /* first letter of an encoded chunk */
        if (!(mode & PHP_ICONV_MIME_DECODE_STRICT)) {
          encoded_word = p1;
          scan_stat = 1;
          break;
        }
        /* break is omitted intentionally */

      default:
        _php_iconv_appendc(retval, *p1, cd_pl);
        break;
      }
      break;
    }
  }
  switch (scan_stat) {
  case 0: case 8: case 11: case 12:
    break;
  default:
    if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
      if (scan_stat == 1) {
        _php_iconv_appendc(retval, '=', cd_pl);
      }
      err = PHP_ICONV_ERR_SUCCESS;
    } else {
      err = PHP_ICONV_ERR_MALFORMED;
      goto out;
    }
  }

  if (next_pos != NULL) {
    *next_pos = p1;
  }

 out:
  if (cd != (iconv_t)(-1)) {
    iconv_close(cd);
  }
  if (cd_pl != (iconv_t)(-1)) {
    iconv_close(cd_pl);
  }
  return err;
}