in GomokuServer/GomokuServer/json11.cpp [480:567]
string parse_string() {
string out;
long last_escaped_codepoint = -1;
while (true) {
if (i == str.size())
return fail("unexpected end of input in string", "");
char ch = str[i++];
if (ch == '"') {
encode_utf8(last_escaped_codepoint, out);
return out;
}
if (in_range(ch, 0, 0x1f))
return fail("unescaped " + esc(ch) + " in string", "");
// The usual case: non-escaped characters
if (ch != '\\') {
encode_utf8(last_escaped_codepoint, out);
last_escaped_codepoint = -1;
out += ch;
continue;
}
// Handle escapes
if (i == str.size())
return fail("unexpected end of input in string", "");
ch = str[i++];
if (ch == 'u') {
// Extract 4-byte escape sequence
string esc = str.substr(i, 4);
// Explicitly check length of the substring. The following loop
// relies on std::string returning the terminating NUL when
// accessing str[length]. Checking here reduces brittleness.
if (esc.length() < 4) {
return fail("bad \\u escape: " + esc, "");
}
for (size_t j = 0; j < 4; j++) {
if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
&& !in_range(esc[j], '0', '9'))
return fail("bad \\u escape: " + esc, "");
}
long codepoint = strtol(esc.data(), nullptr, 16);
// JSON specifies that characters outside the BMP shall be encoded as a pair
// of 4-hex-digit \u escapes encoding their surrogate pair components. Check
// whether we're in the middle of such a beast: the previous codepoint was an
// escaped lead (high) surrogate, and this is a trail (low) surrogate.
if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
&& in_range(codepoint, 0xDC00, 0xDFFF)) {
// Reassemble the two surrogate pairs into one astral-plane character, per
// the UTF-16 algorithm.
encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
| (codepoint - 0xDC00)) + 0x10000, out);
last_escaped_codepoint = -1;
} else {
encode_utf8(last_escaped_codepoint, out);
last_escaped_codepoint = codepoint;
}
i += 4;
continue;
}
encode_utf8(last_escaped_codepoint, out);
last_escaped_codepoint = -1;
if (ch == 'b') {
out += '\b';
} else if (ch == 'f') {
out += '\f';
} else if (ch == 'n') {
out += '\n';
} else if (ch == 'r') {
out += '\r';
} else if (ch == 't') {
out += '\t';
} else if (ch == '"' || ch == '\\' || ch == '/') {
out += ch;
} else {
return fail("invalid escape character " + esc(ch), "");
}
}
}