in json/apr_json_decode.c [82:350]
static apr_status_t apr_json_decode_string(apr_json_scanner_t * self, apr_json_string_t * retval)
{
apr_status_t status = APR_SUCCESS;
apr_json_string_t string;
const char *p = self->p;
const char *e;
char *q;
apr_ssize_t len;
if (self->p >= self->e) {
status = APR_EOF;
goto out;
}
self->p++; /* eat the leading '"' */
/* advance past the \ " */
len = 0;
for (p = self->p, e = self->e; p < e;) {
if (*p == '"')
break;
else if (*p == '\\') {
p++;
if (p >= e) {
status = APR_EOF;
goto out;
}
if (*p == 'u') {
if (p + 4 >= e) {
status = APR_EOF;
goto out;
}
p += 5;
len += 4;/* an UTF-8 character spans at most 4 bytes */
}
else {
len++;
p++;
}
}
else {
len++;
p++;
}
}
string.p = q = apr_pcalloc(self->pool, len + 1);
e = p;
#define VALIDATE_UTF8_SUCCEEDING_BYTE(p) \
if (*(unsigned char *)(p) < 0x80 || *(unsigned char *)(p) >= 0xc0) { \
status = APR_BADCH; \
goto out; \
}
for (p = self->p; p < e;) {
switch (*(unsigned char *)p) {
case '\\':
p++;
switch (*p) {
case 'u':
/* THIS IS REQUIRED TO BE A 4 DIGIT HEX NUMBER */
{
int i, d, cp = 0;
for (i = 0, p++; i < 4 && p < e; i++, p++) {
d = hex_to_int(*p);
if (d < 0) {
status = APR_BADCH;
goto out;
}
cp = (cp << 4) | d;
}
if (cp >= 0xd800 && cp < 0xdc00) {
/* surrogate pair */
int sc = 0;
if (p + 6 > e) {
status = APR_EOF;
goto out;
}
if (p[0] != '\\' && p[1] != 'u') {
status = APR_BADCH;
goto out;
}
for (i = 0, p += 2; i < 4 && p < e; i++, p++) {
d = hex_to_int(*p);
if (d < 0) {
status = APR_BADCH;
goto out;
}
sc = (sc << 4) | d;
}
cp = ((cp & 0x3ff) << 10) | (sc & 0x3ff);
if ((cp >= 0xd800 && cp < 0xe000) || (cp >= 0x110000)) {
status = APR_BADCH;
goto out;
}
}
else if (cp >= 0xdc00 && cp < 0xe000) {
status = APR_BADCH;
goto out;
}
q += ucs4_to_utf8(q, cp);
}
break;
case '\\':
*q++ = '\\';
p++;
break;
case '/':
*q++ = '/';
p++;
break;
case 'n':
*q++ = '\n';
p++;
break;
case 'r':
*q++ = '\r';
p++;
break;
case 't':
*q++ = '\t';
p++;
break;
case 'f':
*q++ = '\f';
p++;
break;
case 'b':
*q++ = '\b';
p++;
break;
case '"':
*q++ = '"';
p++;
break;
default:
status = APR_BADCH;
goto out;
}
break;
case 0xc0:
case 0xc1:
case 0xc2:
case 0xc3:
case 0xc4:
case 0xc5:
case 0xc6:
case 0xc7:
case 0xc8:
case 0xc9:
case 0xca:
case 0xcb:
case 0xcc:
case 0xcd:
case 0xce:
case 0xcf:
case 0xd0:
case 0xd1:
case 0xd2:
case 0xd3:
case 0xd4:
case 0xd5:
case 0xd6:
case 0xd7:
case 0xd8:
case 0xd9:
case 0xda:
case 0xdb:
case 0xdc:
case 0xdd:
case 0xde:
case 0xdf:
if (p + 1 >= e) {
status = APR_EOF;
goto out;
}
*q++ = *p++;
VALIDATE_UTF8_SUCCEEDING_BYTE(p);
*q++ = *p++;
break;
case 0xe0:
case 0xe1:
case 0xe2:
case 0xe3:
case 0xe4:
case 0xe5:
case 0xe6:
case 0xe7:
case 0xe8:
case 0xe9:
case 0xea:
case 0xeb:
case 0xec:
case 0xed:
case 0xee:
case 0xef:
if (p + 2 >= e) {
status = APR_EOF;
goto out;
}
*q++ = *p++;
VALIDATE_UTF8_SUCCEEDING_BYTE(p);
*q++ = *p++;
VALIDATE_UTF8_SUCCEEDING_BYTE(p);
*q++ = *p++;
break;
case 0xf0:
case 0xf1:
case 0xf2:
case 0xf3:
case 0xf4:
case 0xf5:
case 0xf6:
case 0xf7:
if (p + 3 >= e) {
status = APR_EOF;
goto out;
}
if (((unsigned char *)p)[0] >= 0xf5 || ((unsigned char *)p)[1] >= 0x90) {
status = APR_BADCH;
goto out;
}
*q++ = *p++;
VALIDATE_UTF8_SUCCEEDING_BYTE(p);
*q++ = *p++;
VALIDATE_UTF8_SUCCEEDING_BYTE(p);
*q++ = *p++;
VALIDATE_UTF8_SUCCEEDING_BYTE(p);
*q++ = *p++;
break;
case 0xf8:
case 0xf9:
case 0xfa:
case 0xfb:
if (p + 4 >= e) {
status = APR_EOF;
goto out;
}
status = APR_BADCH;
goto out;
case 0xfc:
case 0xfd:
if (p + 5 >= e) {
status = APR_EOF;
goto out;
}
status = APR_BADCH;
goto out;
default:
*q++ = *p++;
break;
}
}
#undef VALIDATE_UTF8_SUCCEEDING_BYTE
p++; /* eat the trailing '"' */
*q = 0;
string.len = q - string.p;
*retval = string;
out:
self->p = p;
return status;
}