public static int convertUTF8ToUTF16()

in java/fury-core/src/main/java/org/apache/fury/util/StringEncodingUtils.java [247:345]


  public static int convertUTF8ToUTF16(byte[] src, int offset, int len, char[] dst) {
    int end = offset + len;
    int dp = 0;
    while (offset < end) {
      if (offset + 8 <= end
          && (Platform.getLong(src, Platform.BYTE_ARRAY_OFFSET + offset) & 0x8080808080808080L)
              == 0) {
        // ascii only
        dst[dp] = (char) src[offset];
        dst[dp + 1] = (char) src[offset + 1];
        dst[dp + 2] = (char) src[offset + 2];
        dst[dp + 3] = (char) src[offset + 3];
        dst[dp + 4] = (char) src[offset + 4];
        dst[dp + 5] = (char) src[offset + 5];
        dst[dp + 6] = (char) src[offset + 6];
        dst[dp + 7] = (char) src[offset + 7];
        dp += 8;
        offset += 8;
      } else {
        int b1 = src[offset++];
        if (b1 >= 0) {
          // 1 byte, 7 bits: 0xxxxxxx
          dst[dp++] = (char) b1;
        } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
          // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
          if (offset >= end) {
            return -1;
          }
          int b2 = src[offset++];
          if ((b2 & 0xc0) != 0x80) { // isNotContinuation(b2)
            return -1;
          } else {
            dst[dp++] = (char) (((b1 << 6) ^ b2) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80)));
          }
        } else if ((b1 >> 4) == -2) {
          // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
          if (offset + 1 >= end) {
            return -1;
          }

          int b2 = src[offset];
          int b3 = src[offset + 1];
          offset += 2;
          if ((b1 == (byte) 0xe0 && (b2 & 0xe0) == 0x80) //
              || (b2 & 0xc0) != 0x80 //
              || (b3 & 0xc0) != 0x80) { // isMalformed3(b1, b2, b3)
            return -1;
          } else {
            char c =
                (char)
                    ((b1 << 12)
                        ^ (b2 << 6)
                        ^ (b3 ^ (((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
            boolean isSurrogate = c >= '\uD800' && c < (Character.MAX_LOW_SURROGATE + 1);
            if (isSurrogate) {
              return -1;
            } else {
              dst[dp++] = c;
            }
          }
        } else if ((b1 >> 3) == -2) {
          // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
          if (offset + 2 >= end) {
            return -1;
          }
          int b2 = src[offset];
          int b3 = src[offset + 1];
          int b4 = src[offset + 2];
          offset += 3;
          int uc =
              ((b1 << 18)
                  ^ (b2 << 12)
                  ^ (b3 << 6)
                  ^ (b4
                      ^ (((byte) 0xF0 << 18)
                          ^ ((byte) 0x80 << 12)
                          ^ ((byte) 0x80 << 6)
                          ^ ((byte) 0x80))));
          if (((b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || (b4 & 0xc0) != 0x80) // isMalformed4
              ||
              // shortest form check
              !(uc >= 0x010000 && uc < 0X10FFFF + 1) // !Character.isSupplementaryCodePoint(uc)
          ) {
            return -1;
          } else {
            dst[dp] =
                (char)
                    ((uc >>> 10) + ('\uD800' - (0x010000 >>> 10))); // Character.highSurrogate(uc);
            dst[dp + 1] =
                (char) ((uc & 0x3ff) + Character.MIN_LOW_SURROGATE); // Character.lowSurrogate(uc);
            dp += 2;
          }
        } else {
          return -1;
        }
      }
    }
    return dp;
  }