string JsonParser::decodeString()

in lang/c++/impl/json/JsonIO.cc [321:429]


string JsonParser::decodeString(const string &s, bool binary) {
    string result;
    auto it = s.cbegin();
    const auto end = s.cend();
    const auto readNextByte = [&]() -> char {
        if (it == end) {
            throw Exception("Unexpected EOF");
        }
        return *it++;
    };
    const auto unicodeParse = [&]() {
        uint32_t n = 0;
        for (int i = 0; i < 4; i++) {
            auto c = readNextByte();
            n *= 16;
            if (isdigit(c)) {
                n += c - '0';
            } else if (c >= 'a' && c <= 'f') {
                n += c - 'a' + 10;
            } else if (c >= 'A' && c <= 'F') {
                n += c - 'A' + 10;
            } else {
                throw Exception("Invalid hex character: {}", c);
            }
        }
        return n;
    };
    while (it != end) {
        string::const_iterator startSeq = it;
        char ch = readNextByte();
        if (ch == '\\') {
            ch = readNextByte();
            switch (ch) {
                case '"':
                case '\\':
                case '/':
                    result.push_back(ch);
                    continue;
                case 'b':
                    result.push_back('\b');
                    continue;
                case 'f':
                    result.push_back('\f');
                    continue;
                case 'n':
                    result.push_back('\n');
                    continue;
                case 'r':
                    result.push_back('\r');
                    continue;
                case 't':
                    result.push_back('\t');
                    continue;
                case 'u':
                case 'U': {
                    uint32_t n = unicodeParse();
                    if (binary) {
                        if (n > 0xff) {
                            throw Exception("Invalid byte for binary: {}{}", ch, string(startSeq, ++it));
                        } else {
                            result.push_back(static_cast<char>(n));
                            continue;
                        }
                    }
                    if (n >= 0xd800 && n < 0xdc00) {
                        ch = readNextByte();
                        if (ch != '\\') {
                            throw Exception("Invalid unicode sequence: {}", string(startSeq, it));
                        }
                        ch = readNextByte();
                        if (ch != 'u' && ch != 'U') {
                            throw Exception("Invalid unicode sequence: {}", string(startSeq, it));
                        }
                        uint32_t m = unicodeParse();
                        if (m < 0xdc00 || m > 0xdfff) {
                            throw Exception("Invalid unicode sequence: {}", string(startSeq, it));
                        }
                        n = 0x10000 + (((n - 0xd800) << 10) | (m - 0xdc00));
                    } else if (n >= 0xdc00 && n < 0xdfff) {
                        throw Exception("Invalid unicode sequence: {}", string(startSeq, it));
                    }
                    if (n < 0x80) {
                        result.push_back(static_cast<char>(n));
                    } else if (n < 0x800) {
                        result.push_back(static_cast<char>((n >> 6) | 0xc0));
                        result.push_back(static_cast<char>((n & 0x3f) | 0x80));
                    } else if (n < 0x10000) {
                        result.push_back(static_cast<char>((n >> 12) | 0xe0));
                        result.push_back(static_cast<char>(((n >> 6) & 0x3f) | 0x80));
                        result.push_back(static_cast<char>((n & 0x3f) | 0x80));
                    } else if (n < 0x110000) {
                        result.push_back(static_cast<char>((n >> 18) | 0xf0));
                        result.push_back(static_cast<char>(((n >> 12) & 0x3f) | 0x80));
                        result.push_back(static_cast<char>(((n >> 6) & 0x3f) | 0x80));
                        result.push_back(static_cast<char>((n & 0x3f) | 0x80));
                    } else {
                        throw Exception("Invalid unicode value: {}{}", n, string(startSeq, ++it));
                    }
                }
                    continue;
                default:
                    throw Exception("Unexpected JSON parse state");
            }
        } else {
            result.push_back(ch);
        }
    }
    return result;
}