in lightning-core/src/main/java/org/apache/directmemory/lightning/internal/util/UnicodeUtil.java [154:219]
public static int UTF16toUTF8( String value, Target target )
throws IOException
{
char[] characters = value.toCharArray();
int length = characters.length;
// Write string length to target
target.writeInt( length );
int i = 0;
final int end = length;
int writtenBytes = 0;
while ( i < end )
{
final int code = characters[i++];
if ( code < 0x80 )
{
target.writeByte( (byte) code );
writtenBytes++;
}
else if ( code < 0x800 )
{
target.writeByte( (byte) ( 0xC0 | ( code >> 6 ) ) );
target.writeByte( (byte) ( 0x80 | ( code & 0x3F ) ) );
writtenBytes += 2;
}
else if ( code < 0xD800 || code > 0xDFFF )
{
target.writeByte( (byte) ( 0xE0 | ( code >> 12 ) ) );
target.writeByte( (byte) ( 0x80 | ( ( code >> 6 ) & 0x3F ) ) );
target.writeByte( (byte) ( 0x80 | ( code & 0x3F ) ) );
writtenBytes += 3;
}
else
{
// surrogate pair
// confirm valid high surrogate
if ( code < 0xDC00 && i < end )
{
int utf32 = characters[i];
// confirm valid low surrogate and write pair
if ( utf32 >= 0xDC00 && utf32 <= 0xDFFF )
{
utf32 = ( code << 10 ) + utf32 + SURROGATE_OFFSET;
i++;
target.writeByte( (byte) ( 0xF0 | ( utf32 >> 18 ) ) );
target.writeByte( (byte) ( 0x80 | ( ( utf32 >> 12 ) & 0x3F ) ) );
target.writeByte( (byte) ( 0x80 | ( ( utf32 >> 6 ) & 0x3F ) ) );
target.writeByte( (byte) ( 0x80 | ( utf32 & 0x3F ) ) );
writtenBytes += 4;
continue;
}
}
// replace unpaired surrogate or out-of-order low surrogate
// with substitution character
target.writeByte( (byte) 0xEF );
target.writeByte( (byte) 0xBF );
target.writeByte( (byte) 0xBD );
writtenBytes += 3;
}
}
return writtenBytes;
}