static void expand()

in lucene/core/src/java/org/apache/lucene/util/automaton/CaseFolding.java [32:321]


  static void expand(int c, IntConsumer fn) {
    // add codepoint
    fn.accept(c);
    // add uppercase from tables
    int upper = Character.toUpperCase(c);
    if (upper != c) {
      fn.accept(upper);
    }
    // add lowercase from tables
    int lower = Character.toLowerCase(c);
    if (lower != c) {
      fn.accept(lower);
    }
    // add special casing variants
    switch (c) {
      case 0x004B: // LATIN CAPITAL LETTER K
        fn.accept(0x212A); // KELVIN SIGN
        break;
      case 0x0053: // LATIN CAPITAL LETTER S
        fn.accept(0x017F); // LATIN SMALL LETTER LONG S
        break;
      case 0x006B: // LATIN SMALL LETTER K
        fn.accept(0x212A); // KELVIN SIGN
        break;
      case 0x0073: // LATIN SMALL LETTER S
        fn.accept(0x017F); // LATIN SMALL LETTER LONG S
        break;
      case 0x00B5: // MICRO SIGN
        fn.accept(0x03BC); // GREEK SMALL LETTER MU
        break;
      case 0x00C5: // LATIN CAPITAL LETTER A WITH RING ABOVE
        fn.accept(0x212B); // ANGSTROM SIGN
        break;
      case 0x00DF: // LATIN SMALL LETTER SHARP S
        fn.accept(0x1E9E); // LATIN CAPITAL LETTER SHARP S
        break;
      case 0x00E5: // LATIN SMALL LETTER A WITH RING ABOVE
        fn.accept(0x212B); // ANGSTROM SIGN
        break;
      case 0x017F: // LATIN SMALL LETTER LONG S
        fn.accept(0x0073); // LATIN SMALL LETTER S
        break;
      case 0x01C4: // LATIN CAPITAL LETTER DZ WITH CARON
        fn.accept(0x01C5); // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
        break;
      case 0x01C6: // LATIN SMALL LETTER DZ WITH CARON
        fn.accept(0x01C5); // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
        break;
      case 0x01C7: // LATIN CAPITAL LETTER LJ
        fn.accept(0x01C8); // LATIN CAPITAL LETTER L WITH SMALL LETTER J
        break;
      case 0x01C9: // LATIN SMALL LETTER LJ
        fn.accept(0x01C8); // LATIN CAPITAL LETTER L WITH SMALL LETTER J
        break;
      case 0x01CA: // LATIN CAPITAL LETTER NJ
        fn.accept(0x01CB); // LATIN CAPITAL LETTER N WITH SMALL LETTER J
        break;
      case 0x01CC: // LATIN SMALL LETTER NJ
        fn.accept(0x01CB); // LATIN CAPITAL LETTER N WITH SMALL LETTER J
        break;
      case 0x01F1: // LATIN CAPITAL LETTER DZ
        fn.accept(0x01F2); // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
        break;
      case 0x01F3: // LATIN SMALL LETTER DZ
        fn.accept(0x01F2); // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
        break;
      case 0x0345: // COMBINING GREEK YPOGEGRAMMENI
        fn.accept(0x03B9); // GREEK SMALL LETTER IOTA
        fn.accept(0x1FBE); // GREEK PROSGEGRAMMENI
        break;
      case 0x0390: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
        fn.accept(0x1FD3); // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
        break;
      case 0x0392: // GREEK CAPITAL LETTER BETA
        fn.accept(0x03D0); // GREEK BETA SYMBOL
        break;
      case 0x0395: // GREEK CAPITAL LETTER EPSILON
        fn.accept(0x03F5); // GREEK LUNATE EPSILON SYMBOL
        break;
      case 0x0398: // GREEK CAPITAL LETTER THETA
        fn.accept(0x03D1); // GREEK THETA SYMBOL
        fn.accept(0x03F4); // GREEK CAPITAL THETA SYMBOL
        break;
      case 0x0399: // GREEK CAPITAL LETTER IOTA
        fn.accept(0x0345); // COMBINING GREEK YPOGEGRAMMENI
        fn.accept(0x1FBE); // GREEK PROSGEGRAMMENI
        break;
      case 0x039A: // GREEK CAPITAL LETTER KAPPA
        fn.accept(0x03F0); // GREEK KAPPA SYMBOL
        break;
      case 0x039C: // GREEK CAPITAL LETTER MU
        fn.accept(0x00B5); // MICRO SIGN
        break;
      case 0x03A0: // GREEK CAPITAL LETTER PI
        fn.accept(0x03D6); // GREEK PI SYMBOL
        break;
      case 0x03A1: // GREEK CAPITAL LETTER RHO
        fn.accept(0x03F1); // GREEK RHO SYMBOL
        break;
      case 0x03A3: // GREEK CAPITAL LETTER SIGMA
        fn.accept(0x03C2); // GREEK SMALL LETTER FINAL SIGMA
        break;
      case 0x03A6: // GREEK CAPITAL LETTER PHI
        fn.accept(0x03D5); // GREEK PHI SYMBOL
        break;
      case 0x03A9: // GREEK CAPITAL LETTER OMEGA
        fn.accept(0x2126); // OHM SIGN
        break;
      case 0x03B0: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
        fn.accept(0x1FE3); // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
        break;
      case 0x03B2: // GREEK SMALL LETTER BETA
        fn.accept(0x03D0); // GREEK BETA SYMBOL
        break;
      case 0x03B5: // GREEK SMALL LETTER EPSILON
        fn.accept(0x03F5); // GREEK LUNATE EPSILON SYMBOL
        break;
      case 0x03B8: // GREEK SMALL LETTER THETA
        fn.accept(0x03D1); // GREEK THETA SYMBOL
        fn.accept(0x03F4); // GREEK CAPITAL THETA SYMBOL
        break;
      case 0x03B9: // GREEK SMALL LETTER IOTA
        fn.accept(0x0345); // COMBINING GREEK YPOGEGRAMMENI
        fn.accept(0x1FBE); // GREEK PROSGEGRAMMENI
        break;
      case 0x03BA: // GREEK SMALL LETTER KAPPA
        fn.accept(0x03F0); // GREEK KAPPA SYMBOL
        break;
      case 0x03BC: // GREEK SMALL LETTER MU
        fn.accept(0x00B5); // MICRO SIGN
        break;
      case 0x03C0: // GREEK SMALL LETTER PI
        fn.accept(0x03D6); // GREEK PI SYMBOL
        break;
      case 0x03C1: // GREEK SMALL LETTER RHO
        fn.accept(0x03F1); // GREEK RHO SYMBOL
        break;
      case 0x03C2: // GREEK SMALL LETTER FINAL SIGMA
        fn.accept(0x03C3); // GREEK SMALL LETTER SIGMA
        break;
      case 0x03C3: // GREEK SMALL LETTER SIGMA
        fn.accept(0x03C2); // GREEK SMALL LETTER FINAL SIGMA
        break;
      case 0x03C6: // GREEK SMALL LETTER PHI
        fn.accept(0x03D5); // GREEK PHI SYMBOL
        break;
      case 0x03C9: // GREEK SMALL LETTER OMEGA
        fn.accept(0x2126); // OHM SIGN
        break;
      case 0x03D0: // GREEK BETA SYMBOL
        fn.accept(0x03B2); // GREEK SMALL LETTER BETA
        break;
      case 0x03D1: // GREEK THETA SYMBOL
        fn.accept(0x03B8); // GREEK SMALL LETTER THETA
        fn.accept(0x03F4); // GREEK CAPITAL THETA SYMBOL
        break;
      case 0x03D5: // GREEK PHI SYMBOL
        fn.accept(0x03C6); // GREEK SMALL LETTER PHI
        break;
      case 0x03D6: // GREEK PI SYMBOL
        fn.accept(0x03C0); // GREEK SMALL LETTER PI
        break;
      case 0x03F0: // GREEK KAPPA SYMBOL
        fn.accept(0x03BA); // GREEK SMALL LETTER KAPPA
        break;
      case 0x03F1: // GREEK RHO SYMBOL
        fn.accept(0x03C1); // GREEK SMALL LETTER RHO
        break;
      case 0x03F4: // GREEK CAPITAL THETA SYMBOL
        fn.accept(0x0398); // GREEK CAPITAL LETTER THETA
        fn.accept(0x03D1); // GREEK THETA SYMBOL
        break;
      case 0x03F5: // GREEK LUNATE EPSILON SYMBOL
        fn.accept(0x03B5); // GREEK SMALL LETTER EPSILON
        break;
      case 0x0412: // CYRILLIC CAPITAL LETTER VE
        fn.accept(0x1C80); // CYRILLIC SMALL LETTER ROUNDED VE
        break;
      case 0x0414: // CYRILLIC CAPITAL LETTER DE
        fn.accept(0x1C81); // CYRILLIC SMALL LETTER LONG-LEGGED DE
        break;
      case 0x041E: // CYRILLIC CAPITAL LETTER O
        fn.accept(0x1C82); // CYRILLIC SMALL LETTER NARROW O
        break;
      case 0x0421: // CYRILLIC CAPITAL LETTER ES
        fn.accept(0x1C83); // CYRILLIC SMALL LETTER WIDE ES
        break;
      case 0x0422: // CYRILLIC CAPITAL LETTER TE
        fn.accept(0x1C84); // CYRILLIC SMALL LETTER TALL TE
        fn.accept(0x1C85); // CYRILLIC SMALL LETTER THREE-LEGGED TE
        break;
      case 0x042A: // CYRILLIC CAPITAL LETTER HARD SIGN
        fn.accept(0x1C86); // CYRILLIC SMALL LETTER TALL HARD SIGN
        break;
      case 0x0432: // CYRILLIC SMALL LETTER VE
        fn.accept(0x1C80); // CYRILLIC SMALL LETTER ROUNDED VE
        break;
      case 0x0434: // CYRILLIC SMALL LETTER DE
        fn.accept(0x1C81); // CYRILLIC SMALL LETTER LONG-LEGGED DE
        break;
      case 0x043E: // CYRILLIC SMALL LETTER O
        fn.accept(0x1C82); // CYRILLIC SMALL LETTER NARROW O
        break;
      case 0x0441: // CYRILLIC SMALL LETTER ES
        fn.accept(0x1C83); // CYRILLIC SMALL LETTER WIDE ES
        break;
      case 0x0442: // CYRILLIC SMALL LETTER TE
        fn.accept(0x1C84); // CYRILLIC SMALL LETTER TALL TE
        fn.accept(0x1C85); // CYRILLIC SMALL LETTER THREE-LEGGED TE
        break;
      case 0x044A: // CYRILLIC SMALL LETTER HARD SIGN
        fn.accept(0x1C86); // CYRILLIC SMALL LETTER TALL HARD SIGN
        break;
      case 0x0462: // CYRILLIC CAPITAL LETTER YAT
        fn.accept(0x1C87); // CYRILLIC SMALL LETTER TALL YAT
        break;
      case 0x0463: // CYRILLIC SMALL LETTER YAT
        fn.accept(0x1C87); // CYRILLIC SMALL LETTER TALL YAT
        break;
      case 0x1C80: // CYRILLIC SMALL LETTER ROUNDED VE
        fn.accept(0x0432); // CYRILLIC SMALL LETTER VE
        break;
      case 0x1C81: // CYRILLIC SMALL LETTER LONG-LEGGED DE
        fn.accept(0x0434); // CYRILLIC SMALL LETTER DE
        break;
      case 0x1C82: // CYRILLIC SMALL LETTER NARROW O
        fn.accept(0x043E); // CYRILLIC SMALL LETTER O
        break;
      case 0x1C83: // CYRILLIC SMALL LETTER WIDE ES
        fn.accept(0x0441); // CYRILLIC SMALL LETTER ES
        break;
      case 0x1C84: // CYRILLIC SMALL LETTER TALL TE
        fn.accept(0x0442); // CYRILLIC SMALL LETTER TE
        fn.accept(0x1C85); // CYRILLIC SMALL LETTER THREE-LEGGED TE
        break;
      case 0x1C85: // CYRILLIC SMALL LETTER THREE-LEGGED TE
        fn.accept(0x0442); // CYRILLIC SMALL LETTER TE
        fn.accept(0x1C84); // CYRILLIC SMALL LETTER TALL TE
        break;
      case 0x1C86: // CYRILLIC SMALL LETTER TALL HARD SIGN
        fn.accept(0x044A); // CYRILLIC SMALL LETTER HARD SIGN
        break;
      case 0x1C87: // CYRILLIC SMALL LETTER TALL YAT
        fn.accept(0x0463); // CYRILLIC SMALL LETTER YAT
        break;
      case 0x1C88: // CYRILLIC SMALL LETTER UNBLENDED UK
        fn.accept(0xA64B); // CYRILLIC SMALL LETTER MONOGRAPH UK
        break;
      case 0x1E60: // LATIN CAPITAL LETTER S WITH DOT ABOVE
        fn.accept(0x1E9B); // LATIN SMALL LETTER LONG S WITH DOT ABOVE
        break;
      case 0x1E61: // LATIN SMALL LETTER S WITH DOT ABOVE
        fn.accept(0x1E9B); // LATIN SMALL LETTER LONG S WITH DOT ABOVE
        break;
      case 0x1E9B: // LATIN SMALL LETTER LONG S WITH DOT ABOVE
        fn.accept(0x1E61); // LATIN SMALL LETTER S WITH DOT ABOVE
        break;
      case 0x1FBE: // GREEK PROSGEGRAMMENI
        fn.accept(0x0345); // COMBINING GREEK YPOGEGRAMMENI
        fn.accept(0x03B9); // GREEK SMALL LETTER IOTA
        break;
      case 0x1FD3: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
        fn.accept(0x0390); // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
        break;
      case 0x1FE3: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
        fn.accept(0x03B0); // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
        break;
      case 0x2126: // OHM SIGN
        fn.accept(0x03A9); // GREEK CAPITAL LETTER OMEGA
        break;
      case 0x212A: // KELVIN SIGN
        fn.accept(0x004B); // LATIN CAPITAL LETTER K
        break;
      case 0x212B: // ANGSTROM SIGN
        fn.accept(0x00C5); // LATIN CAPITAL LETTER A WITH RING ABOVE
        break;
      case 0xA64A: // CYRILLIC CAPITAL LETTER MONOGRAPH UK
        fn.accept(0x1C88); // CYRILLIC SMALL LETTER UNBLENDED UK
        break;
      case 0xA64B: // CYRILLIC SMALL LETTER MONOGRAPH UK
        fn.accept(0x1C88); // CYRILLIC SMALL LETTER UNBLENDED UK
        break;
      case 0xFB05: // LATIN SMALL LIGATURE LONG S T
        fn.accept(0xFB06); // LATIN SMALL LIGATURE ST
        break;
      case 0xFB06: // LATIN SMALL LIGATURE ST
        fn.accept(0xFB05); // LATIN SMALL LIGATURE LONG S T
        break;
    }
  }