private static int getCharBufferNonAsciiCharsFromUtf8()

in datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java [133:205]


  private static int getCharBufferNonAsciiCharsFromUtf8(final CharBuffer cbuf, final char[] carr,
      int cpos, final int clim, long address, final long addressLimit, final Object unsafeObj,
      final long cumBaseOffset) {

    while (address < addressLimit) {
      final byte byte1 = unsafe.getByte(unsafeObj, address++);
      if (DecodeUtil.isOneByte(byte1)) {
        checkCharBufferPos(cbuf, cpos, clim);
        carr[cpos++] = (char) byte1;
        // It's common for there to be multiple ASCII characters in a run mixed in, so add an
        // extra optimized loop to take care of these runs.
        while (address < addressLimit) {
          final byte b = unsafe.getByte(unsafeObj, address);
          if (!DecodeUtil.isOneByte(b)) {
            break;
          }
          address++;
          checkCharBufferPos(cbuf, cpos, clim);
          carr[cpos++] = (char) b;
        }
      }
      else if (DecodeUtil.isTwoBytes(byte1)) {
        if (address >= addressLimit) {
          cbuf.position(cpos - cbuf.arrayOffset());
          final long off = address - cumBaseOffset;
          final long limit = addressLimit - cumBaseOffset;
          throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off, limit, 2);
        }
        checkCharBufferPos(cbuf, cpos, clim);
        DecodeUtil.handleTwoBytesCharBuffer(
          byte1,
          /* byte2 */ unsafe.getByte(unsafeObj, address++),
          cbuf, carr, cpos);
        cpos++;
      }
      else if (DecodeUtil.isThreeBytes(byte1)) {
        if (address >= (addressLimit - 1)) {
          cbuf.position(cpos - cbuf.arrayOffset());
          final long off = address - cumBaseOffset;
          final long limit = addressLimit - cumBaseOffset;
          throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off, limit, 3);
        }
        checkCharBufferPos(cbuf, cpos, clim);
        DecodeUtil.handleThreeBytesCharBuffer(
          byte1,
          /* byte2 */ unsafe.getByte(unsafeObj, address++),
          /* byte3 */ unsafe.getByte(unsafeObj, address++),
          cbuf, carr, cpos);
        cpos++;
      }
      else {
        if (address >= (addressLimit - 2)) {
          cbuf.position(cpos - cbuf.arrayOffset());
          final long off = address - cumBaseOffset;
          final long limit = addressLimit - cumBaseOffset;
          throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off, limit, 4);
        }
        if (cpos >= (clim - 1)) {
          cbuf.position(cpos - cbuf.arrayOffset());
          throw new BufferOverflowException();
        }
        DecodeUtil.handleFourBytesCharBuffer(
          byte1,
          /* byte2 */ unsafe.getByte(unsafeObj, address++),
          /* byte3 */ unsafe.getByte(unsafeObj, address++),
          /* byte4 */ unsafe.getByte(unsafeObj, address++),
          cbuf, carr, cpos);
        cpos += 2;
      }
    }
    cbuf.position(cpos - cbuf.arrayOffset());
    return cpos;
  }