private static int getCharBufferCharsFromUtf8()

in datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java [93:131]


  private static int getCharBufferCharsFromUtf8(final long offsetBytes, final CharBuffer cbuf,
        final int utf8LengthBytes, final long cumBaseOffset, final Object unsafeObj) {
    final char[] carr = cbuf.array();
    final int startCpos = cbuf.position() + cbuf.arrayOffset();
    int cpos = startCpos;
    final int clim = cbuf.arrayOffset() + cbuf.limit();
    final long address = cumBaseOffset + offsetBytes;
    int i = 0; //byte index

    // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
    // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
    final int cbufNoCheckLimit = Math.min(utf8LengthBytes, clim - cpos);
    // Need to keep this loop int-indexed, because it's faster for Hotspot JIT, it doesn't insert
    // savepoint polls on each iteration.
    for (; i < cbufNoCheckLimit; i++) {
      final byte b = unsafe.getByte(unsafeObj, address + i);
      if (!DecodeUtil.isOneByte(b)) {
        break;
      }
      // Not checking CharBuffer bounds!
      carr[cpos++] = (char) b;
    }

    for (; i < utf8LengthBytes; i++) {
      final byte b = unsafe.getByte(unsafeObj, address + i);
      if (!DecodeUtil.isOneByte(b)) {
        break;
      }
      checkCharBufferPos(cbuf, cpos, clim);
      carr[cpos++] = (char) b;
    }
    if (i == utf8LengthBytes) {
      cbuf.position(cpos - cbuf.arrayOffset());
      return cpos - startCpos;
    }

    return getCharBufferNonAsciiCharsFromUtf8(cbuf, carr, cpos, clim, address + i,
        address + utf8LengthBytes, unsafeObj, cumBaseOffset) - cbuf.arrayOffset();
  }