private static byte bestCoder()

in java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java [870:923]


  private static byte bestCoder(char[] chars) {
    int numChars = chars.length;
    // sample 64 chars
    int sampleNum = Math.min(64, numChars);
    int vectorizedLen = sampleNum >> 2;
    int vectorizedChars = vectorizedLen << 2;
    int endOffset = Platform.CHAR_ARRAY_OFFSET + (vectorizedChars << 1);
    int asciiCount = 0;
    int latin1Count = 0;
    for (int offset = Platform.CHAR_ARRAY_OFFSET, charOffset = 0;
        offset < endOffset;
        offset += 8, charOffset += 4) {
      long multiChars = Platform.getLong(chars, offset);
      if ((multiChars & MULTI_CHARS_NON_ASCII_MASK) == 0) {
        latin1Count += 4;
        asciiCount += 4;
      } else if ((multiChars & MULTI_CHARS_NON_LATIN_MASK) == 0) {
        latin1Count += 4;
        for (int i = 0; i < 4; ++i) {
          if (chars[charOffset + i] < 0x80) {
            asciiCount++;
          }
        }
      } else {
        for (int i = 0; i < 4; ++i) {
          if (chars[charOffset + i] < 0x80) {
            latin1Count++;
            asciiCount++;
          } else if (chars[charOffset + i] <= 0xFF) {
            latin1Count++;
          }
        }
      }
    }

    for (int i = vectorizedChars; i < sampleNum; i++) {
      if (chars[i] < 0x80) {
        latin1Count++;
        asciiCount++;
      } else if (chars[i] <= 0xFF) {
        latin1Count++;
      }
    }

    if (latin1Count == numChars
        || (latin1Count == sampleNum && StringUtils.isLatin(chars, sampleNum))) {
      return LATIN1;
    } else if (asciiCount >= sampleNum * 0.5) {
      // ascii number > 50%, choose UTF-8
      return UTF8;
    } else {
      return UTF16;
    }
  }