function phutil_utf8_console_strlen()

in src/utils/utf8.php [284:327]


function phutil_utf8_console_strlen($string) {
  // Formatting and colors don't contribute any width in the console.
  $string = preg_replace("/\x1B\[\d*m/", '', $string);

  // In the common case of an ASCII string, just return the string length.
  if (preg_match('/^[\x01-\x7F]*\z/', $string)) {
    return strlen($string);
  }

  $len = 0;

  // NOTE: To deal with combining characters, we're splitting the string into
  // glyphs first (characters with combiners) and then counting just the width
  // of the first character in each glyph.

  $display_glyphs = phutil_utf8v_combined($string);
  foreach ($display_glyphs as $display_glyph) {
    $glyph_codepoints = phutil_utf8v_codepoints($display_glyph);
    foreach ($glyph_codepoints as $c) {
      if ($c == 0) {
        break;
      }

      $len += 1 +
        ($c >= 0x1100 &&
          ($c <= 0x115F ||                    /* Hangul Jamo init. consonants */
            $c == 0x2329 || $c == 0x232A ||
            ($c >= 0x2E80 && $c <= 0xA4CF &&
              $c != 0x303F) ||                  /* CJK ... Yi */
            ($c >= 0xAC00 && $c <= 0xD7A3) || /* Hangul Syllables */
            ($c >= 0xF900 && $c <= 0xFAFF) || /* CJK Compatibility Ideographs */
            ($c >= 0xFE10 && $c <= 0xFE19) || /* Vertical forms */
            ($c >= 0xFE30 && $c <= 0xFE6F) || /* CJK Compatibility Forms */
            ($c >= 0xFF00 && $c <= 0xFF60) || /* Fullwidth Forms */
            ($c >= 0xFFE0 && $c <= 0xFFE6) ||
            ($c >= 0x20000 && $c <= 0x2FFFD) ||
            ($c >= 0x30000 && $c <= 0x3FFFD)));

      break;
    }
  }

  return $len;
}