size_t utfToUtf8()

in platform/cc/interop.cc [858:938]


size_t utfToUtf8(unsigned char *data, size_t len) {
    size_t read_offset = 0;
    size_t write_offset = 0;
    while (read_offset < len) {
        unsigned char byte1 = data[read_offset];

        // single-byte U+0001..007F
        if ((byte1 & 0b10000000) == 0) {
            data[write_offset] = byte1;
            read_offset += 1;
            write_offset += 1;
            continue;
        }

        SkASSERT(read_offset + 1 < len);
        unsigned char byte2 = data[read_offset + 1];

        // two-byte U+0000
        if (byte1 == 0b11000000 && byte2 == 0b10000000) {
            data[write_offset] = 0;
            read_offset += 2;
            write_offset += 1;
            continue;
        }

        // two-byte U+0080..07FF
        if ((byte1 & 0b11100000) == 0b11000000) {
            SkASSERT((byte2 & 0b11000000) == 0b10000000);
            data[write_offset] = byte1;
            data[write_offset + 1] = byte2;
            read_offset += 2;
            write_offset += 2;
            continue;
        }

        SkASSERT(read_offset + 2 < len);
        unsigned char byte3 = data[read_offset + 2];

        // Six-byte modified UTF-8
        // 11101101    1010xxxx    10xxxxxx    11101101    1011xxxx    10xxxxxx
        if (byte1 == 0b11101101 && (byte2 & 0b11110000) == 0b10100000) {
            SkASSERT(read_offset + 5 < len);
            unsigned char byte4 = data[read_offset + 3];
            unsigned char byte5 = data[read_offset + 4];
            unsigned char byte6 = data[read_offset + 5];
            SkASSERT((byte3 & 0b11000000) == 0b10000000);
            SkASSERT(byte4 == 0b11101101);
            SkASSERT((byte5 & 0b11110000) == 0b10110000);
            SkASSERT((byte6 & 0b11000000) == 0b10000000);
            uint32_t codepoint = (((byte2 & 0b00001111) << 16) |
                                  ((byte3 & 0b00111111) << 10) | 
                                  ((byte5 & 0b00001111) << 6) | 
                                   (byte6 & 0b00111111))
                                 + 0x10000;
            // Four-byte UTF-8
            // 11110xxx    10xxxxxx    10xxxxxx    10xxxxxx
            data[write_offset]     = 0b11110000 | ((codepoint >> 18) & 0b00000111);
            data[write_offset + 1] = 0b10000000 | ((codepoint >> 12) & 0b00111111);
            data[write_offset + 2] = 0b10000000 | ((codepoint >> 6) & 0b00111111);
            data[write_offset + 3] = 0b10000000 | (codepoint & 0b00111111);

            read_offset += 6;
            write_offset += 4;
            continue;
        }

        // three-byte U+0800..FFFF
        if ((byte1 & 0b11110000) == 0b11100000) {
            SkASSERT((byte2 & 0b11000000) == 0b10000000);
            SkASSERT((byte3 & 0b11000000) == 0b10000000);
            data[write_offset] = byte1;
            data[write_offset + 1] = byte2;
            data[write_offset + 2] = byte3;
            read_offset += 3;
            write_offset += 3;
            continue;
        }
    }

    return write_offset;
}