static int verify_utf8_sequence()

in parson.c [243:286]


static int verify_utf8_sequence(_Nt_array_ptr<const unsigned char> s, _Ptr<int> len) {
    unsigned int cp = 0;
    *len = num_bytes_in_utf8_sequence(s[0]);
    // TODO: Requires bounds widening, so left unchecked.
    _Unchecked {
        const unsigned char* string = (const unsigned char*)s;
        if (*len == 1) {
            cp = string[0];
        } else if (*len == 2 && IS_CONT(string[1])) {
            cp = string[0] & 0x1F;
            cp = (cp << 6) | (string[1] & 0x3F);
        } else if (*len == 3 && IS_CONT(string[1]) && IS_CONT(string[2])) {
            cp = ((unsigned char)string[0]) & 0xF;
            cp = (cp << 6) | (string[1] & 0x3F);
            cp = (cp << 6) | (string[2] & 0x3F);
        } else if (*len == 4 && IS_CONT(string[1]) && IS_CONT(string[2]) && IS_CONT(string[3])) {
            cp = string[0] & 0x7;
            cp = (cp << 6) | (string[1] & 0x3F);
            cp = (cp << 6) | (string[2] & 0x3F);
            cp = (cp << 6) | (string[3] & 0x3F);
        } else {
            return 0;
        }
    }

    /* overlong encodings */
    if ((cp < 0x80    && *len > 1) ||
        (cp < 0x800   && *len > 2) ||
        (cp < 0x10000 && *len > 3)) {
        return 0;
    }

    /* invalid unicode */
    if (cp > 0x10FFFF) {
        return 0;
    }

    /* surrogate halves */
    if (cp >= 0xD800 && cp <= 0xDFFF) {
        return 0;
    }

    return 1;
}