in platform/cc/interop.cc [858:938]
size_t utfToUtf8(unsigned char *data, size_t len) {
size_t read_offset = 0;
size_t write_offset = 0;
while (read_offset < len) {
unsigned char byte1 = data[read_offset];
// single-byte U+0001..007F
if ((byte1 & 0b10000000) == 0) {
data[write_offset] = byte1;
read_offset += 1;
write_offset += 1;
continue;
}
SkASSERT(read_offset + 1 < len);
unsigned char byte2 = data[read_offset + 1];
// two-byte U+0000
if (byte1 == 0b11000000 && byte2 == 0b10000000) {
data[write_offset] = 0;
read_offset += 2;
write_offset += 1;
continue;
}
// two-byte U+0080..07FF
if ((byte1 & 0b11100000) == 0b11000000) {
SkASSERT((byte2 & 0b11000000) == 0b10000000);
data[write_offset] = byte1;
data[write_offset + 1] = byte2;
read_offset += 2;
write_offset += 2;
continue;
}
SkASSERT(read_offset + 2 < len);
unsigned char byte3 = data[read_offset + 2];
// Six-byte modified UTF-8
// 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
if (byte1 == 0b11101101 && (byte2 & 0b11110000) == 0b10100000) {
SkASSERT(read_offset + 5 < len);
unsigned char byte4 = data[read_offset + 3];
unsigned char byte5 = data[read_offset + 4];
unsigned char byte6 = data[read_offset + 5];
SkASSERT((byte3 & 0b11000000) == 0b10000000);
SkASSERT(byte4 == 0b11101101);
SkASSERT((byte5 & 0b11110000) == 0b10110000);
SkASSERT((byte6 & 0b11000000) == 0b10000000);
uint32_t codepoint = (((byte2 & 0b00001111) << 16) |
((byte3 & 0b00111111) << 10) |
((byte5 & 0b00001111) << 6) |
(byte6 & 0b00111111))
+ 0x10000;
// Four-byte UTF-8
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
data[write_offset] = 0b11110000 | ((codepoint >> 18) & 0b00000111);
data[write_offset + 1] = 0b10000000 | ((codepoint >> 12) & 0b00111111);
data[write_offset + 2] = 0b10000000 | ((codepoint >> 6) & 0b00111111);
data[write_offset + 3] = 0b10000000 | (codepoint & 0b00111111);
read_offset += 6;
write_offset += 4;
continue;
}
// three-byte U+0800..FFFF
if ((byte1 & 0b11110000) == 0b11100000) {
SkASSERT((byte2 & 0b11000000) == 0b10000000);
SkASSERT((byte3 & 0b11000000) == 0b10000000);
data[write_offset] = byte1;
data[write_offset + 1] = byte2;
data[write_offset + 2] = byte3;
read_offset += 3;
write_offset += 3;
continue;
}
}
return write_offset;
}