in datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java [93:131]
private static int getCharBufferCharsFromUtf8(final long offsetBytes, final CharBuffer cbuf,
final int utf8LengthBytes, final long cumBaseOffset, final Object unsafeObj) {
final char[] carr = cbuf.array();
final int startCpos = cbuf.position() + cbuf.arrayOffset();
int cpos = startCpos;
final int clim = cbuf.arrayOffset() + cbuf.limit();
final long address = cumBaseOffset + offsetBytes;
int i = 0; //byte index
// Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
// This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
final int cbufNoCheckLimit = Math.min(utf8LengthBytes, clim - cpos);
// Need to keep this loop int-indexed, because it's faster for Hotspot JIT, it doesn't insert
// savepoint polls on each iteration.
for (; i < cbufNoCheckLimit; i++) {
final byte b = unsafe.getByte(unsafeObj, address + i);
if (!DecodeUtil.isOneByte(b)) {
break;
}
// Not checking CharBuffer bounds!
carr[cpos++] = (char) b;
}
for (; i < utf8LengthBytes; i++) {
final byte b = unsafe.getByte(unsafeObj, address + i);
if (!DecodeUtil.isOneByte(b)) {
break;
}
checkCharBufferPos(cbuf, cpos, clim);
carr[cpos++] = (char) b;
}
if (i == utf8LengthBytes) {
cbuf.position(cpos - cbuf.arrayOffset());
return cpos - startCpos;
}
return getCharBufferNonAsciiCharsFromUtf8(cbuf, carr, cpos, clim, address + i,
address + utf8LengthBytes, unsafeObj, cumBaseOffset) - cbuf.arrayOffset();
}