static apr_status_t apr_json_decode_string()

in json/apr_json_decode.c [82:350]


static apr_status_t apr_json_decode_string(apr_json_scanner_t * self, apr_json_string_t * retval)
{
    apr_status_t status = APR_SUCCESS;
    apr_json_string_t string;
    const char *p = self->p;
    const char *e;
    char *q;
    apr_ssize_t len;

    if (self->p >= self->e) {
        status = APR_EOF;
        goto out;
    }

    self->p++; /* eat the leading '"' */

    /* advance past the \ " */
    len = 0;
    for (p = self->p, e = self->e; p < e;) {
        if (*p == '"')
            break;
        else if (*p == '\\') {
            p++;
            if (p >= e) {
                status = APR_EOF;
                goto out;
            }
            if (*p == 'u') {
                if (p + 4 >= e) {
                    status = APR_EOF;
                    goto out;
                }
                p += 5;
                len += 4;/* an UTF-8 character spans at most 4 bytes */
            }
            else {
                len++;
                p++;
            }
        }
        else {
            len++;
            p++;
        }
    }

    string.p = q = apr_pcalloc(self->pool, len + 1);
    e = p;

#define VALIDATE_UTF8_SUCCEEDING_BYTE(p) \
    if (*(unsigned char *)(p) < 0x80 || *(unsigned char *)(p) >= 0xc0) { \
        status = APR_BADCH; \
        goto out; \
    }

    for (p = self->p; p < e;) {
        switch (*(unsigned char *)p) {
        case '\\':
            p++;
            switch (*p) {
            case 'u':
                /* THIS IS REQUIRED TO BE A 4 DIGIT HEX NUMBER */
                {
                    int i, d, cp = 0;
                    for (i = 0, p++; i < 4 && p < e; i++, p++) {
                        d = hex_to_int(*p);
                        if (d < 0) {
                            status = APR_BADCH;
                            goto out;
                        }
                        cp = (cp << 4) | d;
                    }
                    if (cp >= 0xd800 && cp < 0xdc00) {
                        /* surrogate pair */
                        int sc = 0;
                        if (p + 6 > e) {
                            status = APR_EOF;
                            goto out;
                        }
                        if (p[0] != '\\' && p[1] != 'u') {
                            status = APR_BADCH;
                            goto out;
                        }
                        for (i = 0, p += 2; i < 4 && p < e; i++, p++) {
                            d = hex_to_int(*p);
                            if (d < 0) {
                                status = APR_BADCH;
                                goto out;
                            }
                            sc = (sc << 4) | d;
                        }
                        cp = ((cp & 0x3ff) << 10) | (sc & 0x3ff);
                        if ((cp >= 0xd800 && cp < 0xe000) || (cp >= 0x110000)) {
                            status = APR_BADCH;
                            goto out;
                        }
                    }
                    else if (cp >= 0xdc00 && cp < 0xe000) {
                        status = APR_BADCH;
                        goto out;
                    }
                    q += ucs4_to_utf8(q, cp);
                }
                break;
            case '\\':
                *q++ = '\\';
                p++;
                break;
            case '/':
                *q++ = '/';
                p++;
                break;
            case 'n':
                *q++ = '\n';
                p++;
                break;
            case 'r':
                *q++ = '\r';
                p++;
                break;
            case 't':
                *q++ = '\t';
                p++;
                break;
            case 'f':
                *q++ = '\f';
                p++;
                break;
            case 'b':
                *q++ = '\b';
                p++;
                break;
            case '"':
                *q++ = '"';
                p++;
                break;
            default:
                status = APR_BADCH;
                goto out;
            }
            break;

        case 0xc0:
        case 0xc1:
        case 0xc2:
        case 0xc3:
        case 0xc4:
        case 0xc5:
        case 0xc6:
        case 0xc7:
        case 0xc8:
        case 0xc9:
        case 0xca:
        case 0xcb:
        case 0xcc:
        case 0xcd:
        case 0xce:
        case 0xcf:
        case 0xd0:
        case 0xd1:
        case 0xd2:
        case 0xd3:
        case 0xd4:
        case 0xd5:
        case 0xd6:
        case 0xd7:
        case 0xd8:
        case 0xd9:
        case 0xda:
        case 0xdb:
        case 0xdc:
        case 0xdd:
        case 0xde:
        case 0xdf:
            if (p + 1 >= e) {
                status = APR_EOF;
                goto out;
            }
            *q++ = *p++;
            VALIDATE_UTF8_SUCCEEDING_BYTE(p);
            *q++ = *p++;
            break;

        case 0xe0:
        case 0xe1:
        case 0xe2:
        case 0xe3:
        case 0xe4:
        case 0xe5:
        case 0xe6:
        case 0xe7:
        case 0xe8:
        case 0xe9:
        case 0xea:
        case 0xeb:
        case 0xec:
        case 0xed:
        case 0xee:
        case 0xef:
            if (p + 2 >= e) {
                status = APR_EOF;
                goto out;
            }
            *q++ = *p++;
            VALIDATE_UTF8_SUCCEEDING_BYTE(p);
            *q++ = *p++;
            VALIDATE_UTF8_SUCCEEDING_BYTE(p);
            *q++ = *p++;
            break;

        case 0xf0:
        case 0xf1:
        case 0xf2:
        case 0xf3:
        case 0xf4:
        case 0xf5:
        case 0xf6:
        case 0xf7:
            if (p + 3 >= e) {
                status = APR_EOF;
                goto out;
            }
            if (((unsigned char *)p)[0] >= 0xf5 || ((unsigned char *)p)[1] >= 0x90) {
                status = APR_BADCH;
                goto out;
            }
            *q++ = *p++;
            VALIDATE_UTF8_SUCCEEDING_BYTE(p);
            *q++ = *p++;
            VALIDATE_UTF8_SUCCEEDING_BYTE(p);
            *q++ = *p++;
            VALIDATE_UTF8_SUCCEEDING_BYTE(p);
            *q++ = *p++;
            break;

        case 0xf8:
        case 0xf9:
        case 0xfa:
        case 0xfb:
            if (p + 4 >= e) {
                status = APR_EOF;
                goto out;
            }
            status = APR_BADCH;
            goto out;

        case 0xfc:
        case 0xfd:
            if (p + 5 >= e) {
                status = APR_EOF;
                goto out;
            }
            status = APR_BADCH;
            goto out;

        default:
            *q++ = *p++;
            break;
        }
    }
#undef VALIDATE_UTF8_SUCCEEDING_BYTE
    p++; /* eat the trailing '"' */
    *q = 0;
    string.len = q - string.p;
    *retval = string;
out:
    self->p = p;
    return status;
}