in parson.c [243:286]
static int verify_utf8_sequence(_Nt_array_ptr<const unsigned char> s, _Ptr<int> len) {
unsigned int cp = 0;
*len = num_bytes_in_utf8_sequence(s[0]);
// TODO: Requires bounds widening, so left unchecked.
_Unchecked {
const unsigned char* string = (const unsigned char*)s;
if (*len == 1) {
cp = string[0];
} else if (*len == 2 && IS_CONT(string[1])) {
cp = string[0] & 0x1F;
cp = (cp << 6) | (string[1] & 0x3F);
} else if (*len == 3 && IS_CONT(string[1]) && IS_CONT(string[2])) {
cp = ((unsigned char)string[0]) & 0xF;
cp = (cp << 6) | (string[1] & 0x3F);
cp = (cp << 6) | (string[2] & 0x3F);
} else if (*len == 4 && IS_CONT(string[1]) && IS_CONT(string[2]) && IS_CONT(string[3])) {
cp = string[0] & 0x7;
cp = (cp << 6) | (string[1] & 0x3F);
cp = (cp << 6) | (string[2] & 0x3F);
cp = (cp << 6) | (string[3] & 0x3F);
} else {
return 0;
}
}
/* overlong encodings */
if ((cp < 0x80 && *len > 1) ||
(cp < 0x800 && *len > 2) ||
(cp < 0x10000 && *len > 3)) {
return 0;
}
/* invalid unicode */
if (cp > 0x10FFFF) {
return 0;
}
/* surrogate halves */
if (cp >= 0xD800 && cp <= 0xDFFF) {
return 0;
}
return 1;
}