in lucene/core/src/java/org/apache/lucene/util/automaton/CaseFolding.java [32:321]
static void expand(int c, IntConsumer fn) {
// add codepoint
fn.accept(c);
// add uppercase from tables
int upper = Character.toUpperCase(c);
if (upper != c) {
fn.accept(upper);
}
// add lowercase from tables
int lower = Character.toLowerCase(c);
if (lower != c) {
fn.accept(lower);
}
// add special casing variants
switch (c) {
case 0x004B: // LATIN CAPITAL LETTER K
fn.accept(0x212A); // KELVIN SIGN
break;
case 0x0053: // LATIN CAPITAL LETTER S
fn.accept(0x017F); // LATIN SMALL LETTER LONG S
break;
case 0x006B: // LATIN SMALL LETTER K
fn.accept(0x212A); // KELVIN SIGN
break;
case 0x0073: // LATIN SMALL LETTER S
fn.accept(0x017F); // LATIN SMALL LETTER LONG S
break;
case 0x00B5: // MICRO SIGN
fn.accept(0x03BC); // GREEK SMALL LETTER MU
break;
case 0x00C5: // LATIN CAPITAL LETTER A WITH RING ABOVE
fn.accept(0x212B); // ANGSTROM SIGN
break;
case 0x00DF: // LATIN SMALL LETTER SHARP S
fn.accept(0x1E9E); // LATIN CAPITAL LETTER SHARP S
break;
case 0x00E5: // LATIN SMALL LETTER A WITH RING ABOVE
fn.accept(0x212B); // ANGSTROM SIGN
break;
case 0x017F: // LATIN SMALL LETTER LONG S
fn.accept(0x0073); // LATIN SMALL LETTER S
break;
case 0x01C4: // LATIN CAPITAL LETTER DZ WITH CARON
fn.accept(0x01C5); // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
break;
case 0x01C6: // LATIN SMALL LETTER DZ WITH CARON
fn.accept(0x01C5); // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
break;
case 0x01C7: // LATIN CAPITAL LETTER LJ
fn.accept(0x01C8); // LATIN CAPITAL LETTER L WITH SMALL LETTER J
break;
case 0x01C9: // LATIN SMALL LETTER LJ
fn.accept(0x01C8); // LATIN CAPITAL LETTER L WITH SMALL LETTER J
break;
case 0x01CA: // LATIN CAPITAL LETTER NJ
fn.accept(0x01CB); // LATIN CAPITAL LETTER N WITH SMALL LETTER J
break;
case 0x01CC: // LATIN SMALL LETTER NJ
fn.accept(0x01CB); // LATIN CAPITAL LETTER N WITH SMALL LETTER J
break;
case 0x01F1: // LATIN CAPITAL LETTER DZ
fn.accept(0x01F2); // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
break;
case 0x01F3: // LATIN SMALL LETTER DZ
fn.accept(0x01F2); // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
break;
case 0x0345: // COMBINING GREEK YPOGEGRAMMENI
fn.accept(0x03B9); // GREEK SMALL LETTER IOTA
fn.accept(0x1FBE); // GREEK PROSGEGRAMMENI
break;
case 0x0390: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
fn.accept(0x1FD3); // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
break;
case 0x0392: // GREEK CAPITAL LETTER BETA
fn.accept(0x03D0); // GREEK BETA SYMBOL
break;
case 0x0395: // GREEK CAPITAL LETTER EPSILON
fn.accept(0x03F5); // GREEK LUNATE EPSILON SYMBOL
break;
case 0x0398: // GREEK CAPITAL LETTER THETA
fn.accept(0x03D1); // GREEK THETA SYMBOL
fn.accept(0x03F4); // GREEK CAPITAL THETA SYMBOL
break;
case 0x0399: // GREEK CAPITAL LETTER IOTA
fn.accept(0x0345); // COMBINING GREEK YPOGEGRAMMENI
fn.accept(0x1FBE); // GREEK PROSGEGRAMMENI
break;
case 0x039A: // GREEK CAPITAL LETTER KAPPA
fn.accept(0x03F0); // GREEK KAPPA SYMBOL
break;
case 0x039C: // GREEK CAPITAL LETTER MU
fn.accept(0x00B5); // MICRO SIGN
break;
case 0x03A0: // GREEK CAPITAL LETTER PI
fn.accept(0x03D6); // GREEK PI SYMBOL
break;
case 0x03A1: // GREEK CAPITAL LETTER RHO
fn.accept(0x03F1); // GREEK RHO SYMBOL
break;
case 0x03A3: // GREEK CAPITAL LETTER SIGMA
fn.accept(0x03C2); // GREEK SMALL LETTER FINAL SIGMA
break;
case 0x03A6: // GREEK CAPITAL LETTER PHI
fn.accept(0x03D5); // GREEK PHI SYMBOL
break;
case 0x03A9: // GREEK CAPITAL LETTER OMEGA
fn.accept(0x2126); // OHM SIGN
break;
case 0x03B0: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
fn.accept(0x1FE3); // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
break;
case 0x03B2: // GREEK SMALL LETTER BETA
fn.accept(0x03D0); // GREEK BETA SYMBOL
break;
case 0x03B5: // GREEK SMALL LETTER EPSILON
fn.accept(0x03F5); // GREEK LUNATE EPSILON SYMBOL
break;
case 0x03B8: // GREEK SMALL LETTER THETA
fn.accept(0x03D1); // GREEK THETA SYMBOL
fn.accept(0x03F4); // GREEK CAPITAL THETA SYMBOL
break;
case 0x03B9: // GREEK SMALL LETTER IOTA
fn.accept(0x0345); // COMBINING GREEK YPOGEGRAMMENI
fn.accept(0x1FBE); // GREEK PROSGEGRAMMENI
break;
case 0x03BA: // GREEK SMALL LETTER KAPPA
fn.accept(0x03F0); // GREEK KAPPA SYMBOL
break;
case 0x03BC: // GREEK SMALL LETTER MU
fn.accept(0x00B5); // MICRO SIGN
break;
case 0x03C0: // GREEK SMALL LETTER PI
fn.accept(0x03D6); // GREEK PI SYMBOL
break;
case 0x03C1: // GREEK SMALL LETTER RHO
fn.accept(0x03F1); // GREEK RHO SYMBOL
break;
case 0x03C2: // GREEK SMALL LETTER FINAL SIGMA
fn.accept(0x03C3); // GREEK SMALL LETTER SIGMA
break;
case 0x03C3: // GREEK SMALL LETTER SIGMA
fn.accept(0x03C2); // GREEK SMALL LETTER FINAL SIGMA
break;
case 0x03C6: // GREEK SMALL LETTER PHI
fn.accept(0x03D5); // GREEK PHI SYMBOL
break;
case 0x03C9: // GREEK SMALL LETTER OMEGA
fn.accept(0x2126); // OHM SIGN
break;
case 0x03D0: // GREEK BETA SYMBOL
fn.accept(0x03B2); // GREEK SMALL LETTER BETA
break;
case 0x03D1: // GREEK THETA SYMBOL
fn.accept(0x03B8); // GREEK SMALL LETTER THETA
fn.accept(0x03F4); // GREEK CAPITAL THETA SYMBOL
break;
case 0x03D5: // GREEK PHI SYMBOL
fn.accept(0x03C6); // GREEK SMALL LETTER PHI
break;
case 0x03D6: // GREEK PI SYMBOL
fn.accept(0x03C0); // GREEK SMALL LETTER PI
break;
case 0x03F0: // GREEK KAPPA SYMBOL
fn.accept(0x03BA); // GREEK SMALL LETTER KAPPA
break;
case 0x03F1: // GREEK RHO SYMBOL
fn.accept(0x03C1); // GREEK SMALL LETTER RHO
break;
case 0x03F4: // GREEK CAPITAL THETA SYMBOL
fn.accept(0x0398); // GREEK CAPITAL LETTER THETA
fn.accept(0x03D1); // GREEK THETA SYMBOL
break;
case 0x03F5: // GREEK LUNATE EPSILON SYMBOL
fn.accept(0x03B5); // GREEK SMALL LETTER EPSILON
break;
case 0x0412: // CYRILLIC CAPITAL LETTER VE
fn.accept(0x1C80); // CYRILLIC SMALL LETTER ROUNDED VE
break;
case 0x0414: // CYRILLIC CAPITAL LETTER DE
fn.accept(0x1C81); // CYRILLIC SMALL LETTER LONG-LEGGED DE
break;
case 0x041E: // CYRILLIC CAPITAL LETTER O
fn.accept(0x1C82); // CYRILLIC SMALL LETTER NARROW O
break;
case 0x0421: // CYRILLIC CAPITAL LETTER ES
fn.accept(0x1C83); // CYRILLIC SMALL LETTER WIDE ES
break;
case 0x0422: // CYRILLIC CAPITAL LETTER TE
fn.accept(0x1C84); // CYRILLIC SMALL LETTER TALL TE
fn.accept(0x1C85); // CYRILLIC SMALL LETTER THREE-LEGGED TE
break;
case 0x042A: // CYRILLIC CAPITAL LETTER HARD SIGN
fn.accept(0x1C86); // CYRILLIC SMALL LETTER TALL HARD SIGN
break;
case 0x0432: // CYRILLIC SMALL LETTER VE
fn.accept(0x1C80); // CYRILLIC SMALL LETTER ROUNDED VE
break;
case 0x0434: // CYRILLIC SMALL LETTER DE
fn.accept(0x1C81); // CYRILLIC SMALL LETTER LONG-LEGGED DE
break;
case 0x043E: // CYRILLIC SMALL LETTER O
fn.accept(0x1C82); // CYRILLIC SMALL LETTER NARROW O
break;
case 0x0441: // CYRILLIC SMALL LETTER ES
fn.accept(0x1C83); // CYRILLIC SMALL LETTER WIDE ES
break;
case 0x0442: // CYRILLIC SMALL LETTER TE
fn.accept(0x1C84); // CYRILLIC SMALL LETTER TALL TE
fn.accept(0x1C85); // CYRILLIC SMALL LETTER THREE-LEGGED TE
break;
case 0x044A: // CYRILLIC SMALL LETTER HARD SIGN
fn.accept(0x1C86); // CYRILLIC SMALL LETTER TALL HARD SIGN
break;
case 0x0462: // CYRILLIC CAPITAL LETTER YAT
fn.accept(0x1C87); // CYRILLIC SMALL LETTER TALL YAT
break;
case 0x0463: // CYRILLIC SMALL LETTER YAT
fn.accept(0x1C87); // CYRILLIC SMALL LETTER TALL YAT
break;
case 0x1C80: // CYRILLIC SMALL LETTER ROUNDED VE
fn.accept(0x0432); // CYRILLIC SMALL LETTER VE
break;
case 0x1C81: // CYRILLIC SMALL LETTER LONG-LEGGED DE
fn.accept(0x0434); // CYRILLIC SMALL LETTER DE
break;
case 0x1C82: // CYRILLIC SMALL LETTER NARROW O
fn.accept(0x043E); // CYRILLIC SMALL LETTER O
break;
case 0x1C83: // CYRILLIC SMALL LETTER WIDE ES
fn.accept(0x0441); // CYRILLIC SMALL LETTER ES
break;
case 0x1C84: // CYRILLIC SMALL LETTER TALL TE
fn.accept(0x0442); // CYRILLIC SMALL LETTER TE
fn.accept(0x1C85); // CYRILLIC SMALL LETTER THREE-LEGGED TE
break;
case 0x1C85: // CYRILLIC SMALL LETTER THREE-LEGGED TE
fn.accept(0x0442); // CYRILLIC SMALL LETTER TE
fn.accept(0x1C84); // CYRILLIC SMALL LETTER TALL TE
break;
case 0x1C86: // CYRILLIC SMALL LETTER TALL HARD SIGN
fn.accept(0x044A); // CYRILLIC SMALL LETTER HARD SIGN
break;
case 0x1C87: // CYRILLIC SMALL LETTER TALL YAT
fn.accept(0x0463); // CYRILLIC SMALL LETTER YAT
break;
case 0x1C88: // CYRILLIC SMALL LETTER UNBLENDED UK
fn.accept(0xA64B); // CYRILLIC SMALL LETTER MONOGRAPH UK
break;
case 0x1E60: // LATIN CAPITAL LETTER S WITH DOT ABOVE
fn.accept(0x1E9B); // LATIN SMALL LETTER LONG S WITH DOT ABOVE
break;
case 0x1E61: // LATIN SMALL LETTER S WITH DOT ABOVE
fn.accept(0x1E9B); // LATIN SMALL LETTER LONG S WITH DOT ABOVE
break;
case 0x1E9B: // LATIN SMALL LETTER LONG S WITH DOT ABOVE
fn.accept(0x1E61); // LATIN SMALL LETTER S WITH DOT ABOVE
break;
case 0x1FBE: // GREEK PROSGEGRAMMENI
fn.accept(0x0345); // COMBINING GREEK YPOGEGRAMMENI
fn.accept(0x03B9); // GREEK SMALL LETTER IOTA
break;
case 0x1FD3: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
fn.accept(0x0390); // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
break;
case 0x1FE3: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
fn.accept(0x03B0); // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
break;
case 0x2126: // OHM SIGN
fn.accept(0x03A9); // GREEK CAPITAL LETTER OMEGA
break;
case 0x212A: // KELVIN SIGN
fn.accept(0x004B); // LATIN CAPITAL LETTER K
break;
case 0x212B: // ANGSTROM SIGN
fn.accept(0x00C5); // LATIN CAPITAL LETTER A WITH RING ABOVE
break;
case 0xA64A: // CYRILLIC CAPITAL LETTER MONOGRAPH UK
fn.accept(0x1C88); // CYRILLIC SMALL LETTER UNBLENDED UK
break;
case 0xA64B: // CYRILLIC SMALL LETTER MONOGRAPH UK
fn.accept(0x1C88); // CYRILLIC SMALL LETTER UNBLENDED UK
break;
case 0xFB05: // LATIN SMALL LIGATURE LONG S T
fn.accept(0xFB06); // LATIN SMALL LIGATURE ST
break;
case 0xFB06: // LATIN SMALL LIGATURE ST
fn.accept(0xFB05); // LATIN SMALL LIGATURE LONG S T
break;
}
}