in datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java [552:576]
static void handleFourBytes(
final byte byte1, final byte byte2, final byte byte3, final byte byte4,
final Appendable dst)
throws IOException, Utf8CodingException {
if (isNotTrailingByte(byte2)
// Check that 1 <= plane <= 16. Tricky optimized form of:
// valid 4-byte leading byte?
// if (byte1 > (byte) 0xF4 ||
// overlong? 4 most significant bits must not all be zero
// byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
// codepoint larger than the highest code point (U+10FFFF)?
// byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
|| ((((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0)
|| isNotTrailingByte(byte3)
|| isNotTrailingByte(byte4)) {
final byte[] out = new byte[] { byte1, byte2, byte3, byte4 };
throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
}
final int codepoint = ((byte1 & 0x07) << 18)
| (trailingByteValue(byte2) << 12)
| (trailingByteValue(byte3) << 6)
| trailingByteValue(byte4);
dst.append(DecodeUtil.highSurrogate(codepoint));
dst.append(DecodeUtil.lowSurrogate(codepoint));
}