in src/UndockedRegFreeWinRT/Catch/catch.hpp [14515:14607]
void XmlEncode::encodeTo( std::ostream& os ) const {
// Apostrophe escaping not necessary if we always use " to write attributes
// (see: http://www.w3.org/TR/xml/#syntax)
for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
uchar c = m_str[idx];
switch (c) {
case '<': os << "<"; break;
case '&': os << "&"; break;
case '>':
// See: http://www.w3.org/TR/xml/#syntax
if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
os << ">";
else
os << c;
break;
case '\"':
if (m_forWhat == ForAttributes)
os << """;
else
os << c;
break;
default:
// Check for control characters and invalid utf-8
// Escape control characters in standard ascii
// see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
hexEscapeChar(os, c);
break;
}
// Plain ASCII: Write it to stream
if (c < 0x7F) {
os << c;
break;
}
// UTF-8 territory
// Check if the encoding is valid and if it is not, hex escape bytes.
// Important: We do not check the exact decoded values for validity, only the encoding format
// First check that this bytes is a valid lead byte:
// This means that it is not encoded as 1111 1XXX
// Or as 10XX XXXX
if (c < 0xC0 ||
c >= 0xF8) {
hexEscapeChar(os, c);
break;
}
auto encBytes = trailingBytes(c);
// Are there enough bytes left to avoid accessing out-of-bounds memory?
if (idx + encBytes - 1 >= m_str.size()) {
hexEscapeChar(os, c);
break;
}
// The header is valid, check data
// The next encBytes bytes must together be a valid utf-8
// This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
bool valid = true;
uint32_t value = headerValue(c);
for (std::size_t n = 1; n < encBytes; ++n) {
uchar nc = m_str[idx + n];
valid &= ((nc & 0xC0) == 0x80);
value = (value << 6) | (nc & 0x3F);
}
if (
// Wrong bit pattern of following bytes
(!valid) ||
// Overlong encodings
(value < 0x80) ||
(0x80 <= value && value < 0x800 && encBytes > 2) ||
(0x800 < value && value < 0x10000 && encBytes > 3) ||
// Encoded value out of range
(value >= 0x110000)
) {
hexEscapeChar(os, c);
break;
}
// If we got here, this is in fact a valid(ish) utf-8 sequence
for (std::size_t n = 0; n < encBytes; ++n) {
os << m_str[idx + n];
}
idx += encBytes - 1;
break;
}
}
}