in androidLibs/fbjni/cxx/fbjni/detail/utf8.cpp [167:209]
std::string modifiedUTF8ToUTF8(const uint8_t* modified, size_t len) noexcept {
// Converting from modified utf8 to utf8 will always shrink, so this will always be sufficient
std::string utf8(len, 0);
size_t j = 0;
for (size_t i = 0; i < len; ) {
// surrogate pair: 1101 10xx xxxx xxxx 1101 11xx xxxx xxxx
// encoded pair: 1110 1101 1010 xxxx 10xx xxxx 1110 1101 1011 xxxx 10xx xxxx
if (len >= i + 6 &&
modified[i] == 0xed &&
(modified[i + 1] & 0xf0) == 0xa0 &&
modified[i + 3] == 0xed &&
(modified[i + 4] & 0xf0) == 0xb0) {
// Valid surrogate pair
char32_t pair1 = decode3ByteUTF8(modified + i);
char32_t pair2 = decode3ByteUTF8(modified + i + 3);
char32_t ch = 0x10000 + (((pair1 & 0x3ff) << 10) |
( pair2 & 0x3ff));
encode4ByteUTF8(ch, utf8, j);
i += 6;
j += 4;
continue;
} else if (len >= i + 2 &&
modified[i] == 0xc0 &&
modified[i + 1] == 0x80) {
utf8[j] = 0;
i += 2;
j += 1;
continue;
}
// copy one byte. This might be a one, two, or three-byte encoding. It might be an invalid
// encoding of some sort, but garbage in garbage out is ok.
utf8[j] = (char) modified[i];
i++;
j++;
}
utf8.resize(j);
return utf8;
}