private static int getNonAsciiCharsFromUtf8()

in datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java [209:271]


  private static int getNonAsciiCharsFromUtf8(final Appendable dst, long address,
      final long addressLimit, final Object unsafeObj, final long cumBaseOffset)
          throws IOException {
    int chars = 0;
    while (address < addressLimit) {
      final byte byte1 = unsafe.getByte(unsafeObj, address++);
      if (DecodeUtil.isOneByte(byte1)) {
        dst.append((char) byte1);
        chars++;
        // It's common for there to be multiple ASCII characters in a run mixed in, so add an
        // extra optimized loop to take care of these runs.
        while (address < addressLimit) {
          final byte b = unsafe.getByte(unsafeObj, address);
          if (!DecodeUtil.isOneByte(b)) {
            break;
          }
          address++;
          dst.append((char) b);
          chars++;
        }
      }
      else if (DecodeUtil.isTwoBytes(byte1)) {
        if (address >= addressLimit) {
          final long off = address - cumBaseOffset;
          final long limit = addressLimit - cumBaseOffset;
          throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off, limit, 2);
        }
        DecodeUtil.handleTwoBytes(
            byte1,
            /* byte2 */ unsafe.getByte(unsafeObj, address++),
            dst);
        chars++;
      }
      else if (DecodeUtil.isThreeBytes(byte1)) {
        if (address >= (addressLimit - 1)) {
          final long off = address - cumBaseOffset;
          final long limit = addressLimit - cumBaseOffset;
          throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off, limit, 3);
        }
        DecodeUtil.handleThreeBytes(
            byte1,
            /* byte2 */ unsafe.getByte(unsafeObj, address++),
            /* byte3 */ unsafe.getByte(unsafeObj, address++),
            dst);
        chars++;
      }
      else {
        if (address >= (addressLimit - 2)) {
          final long off = address - cumBaseOffset;
          final long limit = addressLimit - cumBaseOffset;
          throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off, limit, 4);
        }
        DecodeUtil.handleFourBytes(
            byte1,
            /* byte2 */ unsafe.getByte(unsafeObj, address++),
            /* byte3 */ unsafe.getByte(unsafeObj, address++),
            /* byte4 */ unsafe.getByte(unsafeObj, address++),
            dst);
        chars += 2;
      }
    }
    return chars;
  }