in impl/build/include/headers/json/json.hpp [16323:16573]
void dump_escaped(const string_t& s, const bool ensure_ascii)
{
std::uint32_t codepoint{};
std::uint8_t state = UTF8_ACCEPT;
std::size_t bytes = 0; // number of bytes written to string_buffer
// number of bytes written at the point of the last valid byte
std::size_t bytes_after_last_accept = 0;
std::size_t undumped_chars = 0;
for (std::size_t i = 0; i < s.size(); ++i)
{
const auto byte = static_cast<uint8_t>(s[i]);
switch (decode(state, codepoint, byte))
{
case UTF8_ACCEPT: // decode found a new code point
{
switch (codepoint)
{
case 0x08: // backspace
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = 'b';
break;
}
case 0x09: // horizontal tab
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = 't';
break;
}
case 0x0A: // newline
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = 'n';
break;
}
case 0x0C: // formfeed
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = 'f';
break;
}
case 0x0D: // carriage return
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = 'r';
break;
}
case 0x22: // quotation mark
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = '\"';
break;
}
case 0x5C: // reverse solidus
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = '\\';
break;
}
default:
{
// escape control characters (0x00..0x1F) or, if
// ensure_ascii parameter is used, non-ASCII characters
if ((codepoint <= 0x1F) || (ensure_ascii && (codepoint >= 0x7F)))
{
if (codepoint <= 0xFFFF)
{
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
(std::snprintf)(string_buffer.data() + bytes, 7, "\\u%04x",
static_cast<std::uint16_t>(codepoint));
bytes += 6;
}
else
{
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
(std::snprintf)(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x",
static_cast<std::uint16_t>(0xD7C0u + (codepoint >> 10u)),
static_cast<std::uint16_t>(0xDC00u + (codepoint & 0x3FFu)));
bytes += 12;
}
}
else
{
// copy byte to buffer (all previous bytes
// been copied have in default case above)
string_buffer[bytes++] = s[i];
}
break;
}
}
// write buffer and reset index; there must be 13 bytes
// left, as this is the maximal number of bytes to be
// written ("\uxxxx\uxxxx\0") for one code point
if (string_buffer.size() - bytes < 13)
{
o->write_characters(string_buffer.data(), bytes);
bytes = 0;
}
// remember the byte position of this accept
bytes_after_last_accept = bytes;
undumped_chars = 0;
break;
}
case UTF8_REJECT: // decode found invalid UTF-8 byte
{
switch (error_handler)
{
case error_handler_t::strict:
{
std::string sn(3, '\0');
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
(std::snprintf)(&sn[0], sn.size(), "%.2X", byte);
JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn, BasicJsonType()));
}
case error_handler_t::ignore:
case error_handler_t::replace:
{
// in case we saw this character the first time, we
// would like to read it again, because the byte
// may be OK for itself, but just not OK for the
// previous sequence
if (undumped_chars > 0)
{
--i;
}
// reset length buffer to the last accepted index;
// thus removing/ignoring the invalid characters
bytes = bytes_after_last_accept;
if (error_handler == error_handler_t::replace)
{
// add a replacement character
if (ensure_ascii)
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = 'u';
string_buffer[bytes++] = 'f';
string_buffer[bytes++] = 'f';
string_buffer[bytes++] = 'f';
string_buffer[bytes++] = 'd';
}
else
{
string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xEF');
string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBF');
string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBD');
}
// write buffer and reset index; there must be 13 bytes
// left, as this is the maximal number of bytes to be
// written ("\uxxxx\uxxxx\0") for one code point
if (string_buffer.size() - bytes < 13)
{
o->write_characters(string_buffer.data(), bytes);
bytes = 0;
}
bytes_after_last_accept = bytes;
}
undumped_chars = 0;
// continue processing the string
state = UTF8_ACCEPT;
break;
}
default: // LCOV_EXCL_LINE
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
}
break;
}
default: // decode found yet incomplete multi-byte code point
{
if (!ensure_ascii)
{
// code point will not be escaped - copy byte to buffer
string_buffer[bytes++] = s[i];
}
++undumped_chars;
break;
}
}
}
// we finished processing the string
if (JSON_HEDLEY_LIKELY(state == UTF8_ACCEPT))
{
// write buffer
if (bytes > 0)
{
o->write_characters(string_buffer.data(), bytes);
}
}
else
{
// we finish reading, but do not accept: string was incomplete
switch (error_handler)
{
case error_handler_t::strict:
{
std::string sn(3, '\0');
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
(std::snprintf)(&sn[0], sn.size(), "%.2X", static_cast<std::uint8_t>(s.back()));
JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn, BasicJsonType()));
}
case error_handler_t::ignore:
{
// write all accepted bytes
o->write_characters(string_buffer.data(), bytes_after_last_accept);
break;
}
case error_handler_t::replace:
{
// write all accepted bytes
o->write_characters(string_buffer.data(), bytes_after_last_accept);
// add a replacement character
if (ensure_ascii)
{
o->write_characters("\\ufffd", 6);
}
else
{
o->write_characters("\xEF\xBF\xBD", 3);
}
break;
}
default: // LCOV_EXCL_LINE
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
}
}
}