in tantivy/src/tokenizer/ascii_folding_filter.rs [1635:4046]
fn test_all_foldings() {
// those folding is a copy of
// https://github.com/apache/lucene-solr/blob/28d187acd1e391723eb6e1b5445f22abf5580a80/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java
// useful regex to adapt to a Rust structure:
// 1. Preg and replace folded:
// - **REGEX** |,"(.){3,5}", // Folded result|
// - **REPLACEMENT** ], "$1".to_string(), ), ( vec![
// 2. Preg and replace characters:
// - **REGEX** |[\+]{0,1} "(.{1,3})" // U\+|
// - **REPLACEMENT** "$1", // U+
let foldings: Vec<(&[&str], &str)> = vec![
(
&[
"À", // U+00C0: LATIN CAPITAL LETTER A WITH GRAVE
"Á", // U+00C1: LATIN CAPITAL LETTER A WITH ACUTE
"Â", // U+00C2: LATIN CAPITAL LETTER A WITH CIRCUMFLEX
"Ã", // U+00C3: LATIN CAPITAL LETTER A WITH TILDE
"Ä", // U+00C4: LATIN CAPITAL LETTER A WITH DIAERESIS
"Å", // U+00C5: LATIN CAPITAL LETTER A WITH RING ABOVE
"Ā", // U+0100: LATIN CAPITAL LETTER A WITH MACRON
"Ă", // U+0102: LATIN CAPITAL LETTER A WITH BREVE
"Ą", // U+0104: LATIN CAPITAL LETTER A WITH OGONEK
"Ə", // U+018F: LATIN CAPITAL LETTER SCHWA
"Ǎ", // U+01CD: LATIN CAPITAL LETTER A WITH CARON
"Ǟ", // U+01DE: LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
"Ǡ", // U+01E0: LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
"Ǻ", // U+01FA: LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
"Ȁ", // U+0200: LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
"Ȃ", // U+0202: LATIN CAPITAL LETTER A WITH INVERTED BREVE
"Ȧ", // U+0226: LATIN CAPITAL LETTER A WITH DOT ABOVE
"Ⱥ", // U+023A: LATIN CAPITAL LETTER A WITH STROKE
"ᴀ", // U+1D00: LATIN LETTER SMALL CAPITAL A
"Ḁ", // U+1E00: LATIN CAPITAL LETTER A WITH RING BELOW
"Ạ", // U+1EA0: LATIN CAPITAL LETTER A WITH DOT BELOW
"Ả", // U+1EA2: LATIN CAPITAL LETTER A WITH HOOK ABOVE
"Ấ", // U+1EA4: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
"Ầ", // U+1EA6: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
"Ẩ", // U+1EA8: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
"Ẫ", // U+1EAA: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
"Ậ", // U+1EAC: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
"Ắ", // U+1EAE: LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
"Ằ", // U+1EB0: LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
"Ẳ", // U+1EB2: LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
"Ẵ", // U+1EB4: LATIN CAPITAL LETTER A WITH BREVE AND TILDE
"Ặ", // U+1EB6: LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
"Ⓐ", // U+24B6: CIRCLED LATIN CAPITAL LETTER A
"A", // U+FF21: FULLWIDTH LATIN CAPITAL LETTER A
],
"A",
),
(
&[
"à", // U+00E0: LATIN SMALL LETTER A WITH GRAVE
"á", // U+00E1: LATIN SMALL LETTER A WITH ACUTE
"â", // U+00E2: LATIN SMALL LETTER A WITH CIRCUMFLEX
"ã", // U+00E3: LATIN SMALL LETTER A WITH TILDE
"ä", // U+00E4: LATIN SMALL LETTER A WITH DIAERESIS
"å", // U+00E5: LATIN SMALL LETTER A WITH RING ABOVE
"ā", // U+0101: LATIN SMALL LETTER A WITH MACRON
"ă", // U+0103: LATIN SMALL LETTER A WITH BREVE
"ą", // U+0105: LATIN SMALL LETTER A WITH OGONEK
"ǎ", // U+01CE: LATIN SMALL LETTER A WITH CARON
"ǟ", // U+01DF: LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
"ǡ", // U+01E1: LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
"ǻ", // U+01FB: LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
"ȁ", // U+0201: LATIN SMALL LETTER A WITH DOUBLE GRAVE
"ȃ", // U+0203: LATIN SMALL LETTER A WITH INVERTED BREVE
"ȧ", // U+0227: LATIN SMALL LETTER A WITH DOT ABOVE
"ɐ", // U+0250: LATIN SMALL LETTER TURNED A
"ə", // U+0259: LATIN SMALL LETTER SCHWA
"ɚ", // U+025A: LATIN SMALL LETTER SCHWA WITH HOOK
"ᶏ", // U+1D8F: LATIN SMALL LETTER A WITH RETROFLEX HOOK
"ḁ", // U+1E01: LATIN SMALL LETTER A WITH RING BELOW
"ᶕ", // U+1D95: LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK
"ẚ", // U+1E9A: LATIN SMALL LETTER A WITH RIGHT HALF RING
"ạ", // U+1EA1: LATIN SMALL LETTER A WITH DOT BELOW
"ả", // U+1EA3: LATIN SMALL LETTER A WITH HOOK ABOVE
"ấ", // U+1EA5: LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
"ầ", // U+1EA7: LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
"ẩ", // U+1EA9: LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
"ẫ", // U+1EAB: LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
"ậ", // U+1EAD: LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
"ắ", // U+1EAF: LATIN SMALL LETTER A WITH BREVE AND ACUTE
"ằ", // U+1EB1: LATIN SMALL LETTER A WITH BREVE AND GRAVE
"ẳ", // U+1EB3: LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
"ẵ", // U+1EB5: LATIN SMALL LETTER A WITH BREVE AND TILDE
"ặ", // U+1EB7: LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
"ₐ", // U+2090: LATIN SUBSCRIPT SMALL LETTER A
"ₔ", // U+2094: LATIN SUBSCRIPT SMALL LETTER SCHWA
"ⓐ", // U+24D0: CIRCLED LATIN SMALL LETTER A
"ⱥ", // U+2C65: LATIN SMALL LETTER A WITH STROKE
"Ɐ", // U+2C6F: LATIN CAPITAL LETTER TURNED A
"a", // U+FF41: FULLWIDTH LATIN SMALL LETTER A
],
"a",
),
(
&[
"Ꜳ", // U+A732: LATIN CAPITAL LETTER AA
],
"AA",
),
(
&[
"Æ", // U+00C6: LATIN CAPITAL LETTER AE
"Ǣ", // U+01E2: LATIN CAPITAL LETTER AE WITH MACRON
"Ǽ", // U+01FC: LATIN CAPITAL LETTER AE WITH ACUTE
"ᴁ", // U+1D01: LATIN LETTER SMALL CAPITAL AE
],
"AE",
),
(
&[
"Ꜵ", // U+A734: LATIN CAPITAL LETTER AO
],
"AO",
),
(
&[
"Ꜷ", // U+A736: LATIN CAPITAL LETTER AU
],
"AU",
),
(
&[
"Ꜹ", // U+A738: LATIN CAPITAL LETTER AV
"Ꜻ", // U+A73A: LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
],
"AV",
),
(
&[
"Ꜽ", // U+A73C: LATIN CAPITAL LETTER AY
],
"AY",
),
(
&[
"⒜", // U+249C: PARENTHESIZED LATIN SMALL LETTER A
],
"(a)",
),
(
&[
"ꜳ", // U+A733: LATIN SMALL LETTER AA
],
"aa",
),
(
&[
"æ", // U+00E6: LATIN SMALL LETTER AE
"ǣ", // U+01E3: LATIN SMALL LETTER AE WITH MACRON
"ǽ", // U+01FD: LATIN SMALL LETTER AE WITH ACUTE
"ᴂ", // U+1D02: LATIN SMALL LETTER TURNED AE
],
"ae",
),
(
&[
"ꜵ", // U+A735: LATIN SMALL LETTER AO
],
"ao",
),
(
&[
"ꜷ", // U+A737: LATIN SMALL LETTER AU
],
"au",
),
(
&[
"ꜹ", // U+A739: LATIN SMALL LETTER AV
"ꜻ", // U+A73B: LATIN SMALL LETTER AV WITH HORIZONTAL BAR
],
"av",
),
(
&[
"ꜽ", // U+A73D: LATIN SMALL LETTER AY
],
"ay",
),
(
&[
"Ɓ", // U+0181: LATIN CAPITAL LETTER B WITH HOOK
"Ƃ", // U+0182: LATIN CAPITAL LETTER B WITH TOPBAR
"Ƀ", // U+0243: LATIN CAPITAL LETTER B WITH STROKE
"ʙ", // U+0299: LATIN LETTER SMALL CAPITAL B
"ᴃ", // U+1D03: LATIN LETTER SMALL CAPITAL BARRED B
"Ḃ", // U+1E02: LATIN CAPITAL LETTER B WITH DOT ABOVE
"Ḅ", // U+1E04: LATIN CAPITAL LETTER B WITH DOT BELOW
"Ḇ", // U+1E06: LATIN CAPITAL LETTER B WITH LINE BELOW
"Ⓑ", // U+24B7: CIRCLED LATIN CAPITAL LETTER B
"B", // U+FF22: FULLWIDTH LATIN CAPITAL LETTER B
],
"B",
),
(
&[
"ƀ", // U+0180: LATIN SMALL LETTER B WITH STROKE
"ƃ", // U+0183: LATIN SMALL LETTER B WITH TOPBAR
"ɓ", // U+0253: LATIN SMALL LETTER B WITH HOOK
"ᵬ", // U+1D6C: LATIN SMALL LETTER B WITH MIDDLE TILDE
"ᶀ", // U+1D80: LATIN SMALL LETTER B WITH PALATAL HOOK
"ḃ", // U+1E03: LATIN SMALL LETTER B WITH DOT ABOVE
"ḅ", // U+1E05: LATIN SMALL LETTER B WITH DOT BELOW
"ḇ", // U+1E07: LATIN SMALL LETTER B WITH LINE BELOW
"ⓑ", // U+24D1: CIRCLED LATIN SMALL LETTER B
"b", // U+FF42: FULLWIDTH LATIN SMALL LETTER B
],
"b",
),
(
&[
"⒝", // U+249D: PARENTHESIZED LATIN SMALL LETTER B
],
"(b)",
),
(
&[
"Ç", // U+00C7: LATIN CAPITAL LETTER C WITH CEDILLA
"Ć", // U+0106: LATIN CAPITAL LETTER C WITH ACUTE
"Ĉ", // U+0108: LATIN CAPITAL LETTER C WITH CIRCUMFLEX
"Ċ", // U+010A: LATIN CAPITAL LETTER C WITH DOT ABOVE
"Č", // U+010C: LATIN CAPITAL LETTER C WITH CARON
"Ƈ", // U+0187: LATIN CAPITAL LETTER C WITH HOOK
"Ȼ", // U+023B: LATIN CAPITAL LETTER C WITH STROKE
"ʗ", // U+0297: LATIN LETTER STRETCHED C
"ᴄ", // U+1D04: LATIN LETTER SMALL CAPITAL C
"Ḉ", // U+1E08: LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
"Ⓒ", // U+24B8: CIRCLED LATIN CAPITAL LETTER C
"C", // U+FF23: FULLWIDTH LATIN CAPITAL LETTER C
],
"C",
),
(
&[
"ç", // U+00E7: LATIN SMALL LETTER C WITH CEDILLA
"ć", // U+0107: LATIN SMALL LETTER C WITH ACUTE
"ĉ", // U+0109: LATIN SMALL LETTER C WITH CIRCUMFLEX
"ċ", // U+010B: LATIN SMALL LETTER C WITH DOT ABOVE
"č", // U+010D: LATIN SMALL LETTER C WITH CARON
"ƈ", // U+0188: LATIN SMALL LETTER C WITH HOOK
"ȼ", // U+023C: LATIN SMALL LETTER C WITH STROKE
"ɕ", // U+0255: LATIN SMALL LETTER C WITH CURL
"ḉ", // U+1E09: LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
"ↄ", // U+2184: LATIN SMALL LETTER REVERSED C
"ⓒ", // U+24D2: CIRCLED LATIN SMALL LETTER C
"Ꜿ", // U+A73E: LATIN CAPITAL LETTER REVERSED C WITH DOT
"ꜿ", // U+A73F: LATIN SMALL LETTER REVERSED C WITH DOT
"c", // U+FF43: FULLWIDTH LATIN SMALL LETTER C
],
"c",
),
(
&[
"⒞", // U+249E: PARENTHESIZED LATIN SMALL LETTER C
],
"(c)",
),
(
&[
"Ð", // U+00D0: LATIN CAPITAL LETTER ETH
"Ď", // U+010E: LATIN CAPITAL LETTER D WITH CARON
"Đ", // U+0110: LATIN CAPITAL LETTER D WITH STROKE
"Ɖ", // U+0189: LATIN CAPITAL LETTER AFRICAN D
"Ɗ", // U+018A: LATIN CAPITAL LETTER D WITH HOOK
"Ƌ", // U+018B: LATIN CAPITAL LETTER D WITH TOPBAR
"ᴅ", // U+1D05: LATIN LETTER SMALL CAPITAL D
"ᴆ", // U+1D06: LATIN LETTER SMALL CAPITAL ETH
"Ḋ", // U+1E0A: LATIN CAPITAL LETTER D WITH DOT ABOVE
"Ḍ", // U+1E0C: LATIN CAPITAL LETTER D WITH DOT BELOW
"Ḏ", // U+1E0E: LATIN CAPITAL LETTER D WITH LINE BELOW
"Ḑ", // U+1E10: LATIN CAPITAL LETTER D WITH CEDILLA
"Ḓ", // U+1E12: LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
"Ⓓ", // U+24B9: CIRCLED LATIN CAPITAL LETTER D
"Ꝺ", // U+A779: LATIN CAPITAL LETTER INSULAR D
"D", // U+FF24: FULLWIDTH LATIN CAPITAL LETTER D
],
"D",
),
(
&[
"ð", // U+00F0: LATIN SMALL LETTER ETH
"ď", // U+010F: LATIN SMALL LETTER D WITH CARON
"đ", // U+0111: LATIN SMALL LETTER D WITH STROKE
"ƌ", // U+018C: LATIN SMALL LETTER D WITH TOPBAR
"ȡ", // U+0221: LATIN SMALL LETTER D WITH CURL
"ɖ", // U+0256: LATIN SMALL LETTER D WITH TAIL
"ɗ", // U+0257: LATIN SMALL LETTER D WITH HOOK
"ᵭ", // U+1D6D: LATIN SMALL LETTER D WITH MIDDLE TILDE
"ᶁ", // U+1D81: LATIN SMALL LETTER D WITH PALATAL HOOK
"ᶑ", // U+1D91: LATIN SMALL LETTER D WITH HOOK AND TAIL
"ḋ", // U+1E0B: LATIN SMALL LETTER D WITH DOT ABOVE
"ḍ", // U+1E0D: LATIN SMALL LETTER D WITH DOT BELOW
"ḏ", // U+1E0F: LATIN SMALL LETTER D WITH LINE BELOW
"ḑ", // U+1E11: LATIN SMALL LETTER D WITH CEDILLA
"ḓ", // U+1E13: LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
"ⓓ", // U+24D3: CIRCLED LATIN SMALL LETTER D
"ꝺ", // U+A77A: LATIN SMALL LETTER INSULAR D
"d", // U+FF44: FULLWIDTH LATIN SMALL LETTER D
],
"d",
),
(
&[
"DŽ", // U+01C4: LATIN CAPITAL LETTER DZ WITH CARON
"DZ", // U+01F1: LATIN CAPITAL LETTER DZ
],
"DZ",
),
(
&[
"Dž", // U+01C5: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
"Dz", // U+01F2: LATIN CAPITAL LETTER D WITH SMALL LETTER Z
],
"Dz",
),
(
&[
"⒟", // U+249F: PARENTHESIZED LATIN SMALL LETTER D
],
"(d)",
),
(
&[
"ȸ", // U+0238: LATIN SMALL LETTER DB DIGRAPH
],
"db",
),
(
&[
"dž", // U+01C6: LATIN SMALL LETTER DZ WITH CARON
"dz", // U+01F3: LATIN SMALL LETTER DZ
"ʣ", // U+02A3: LATIN SMALL LETTER DZ DIGRAPH
"ʥ", // U+02A5: LATIN SMALL LETTER DZ DIGRAPH WITH CURL
],
"dz",
),
(
&[
"È", // U+00C8: LATIN CAPITAL LETTER E WITH GRAVE
"É", // U+00C9: LATIN CAPITAL LETTER E WITH ACUTE
"Ê", // U+00CA: LATIN CAPITAL LETTER E WITH CIRCUMFLEX
"Ë", // U+00CB: LATIN CAPITAL LETTER E WITH DIAERESIS
"Ē", // U+0112: LATIN CAPITAL LETTER E WITH MACRON
"Ĕ", // U+0114: LATIN CAPITAL LETTER E WITH BREVE
"Ė", // U+0116: LATIN CAPITAL LETTER E WITH DOT ABOVE
"Ę", // U+0118: LATIN CAPITAL LETTER E WITH OGONEK
"Ě", // U+011A: LATIN CAPITAL LETTER E WITH CARON
"Ǝ", // U+018E: LATIN CAPITAL LETTER REVERSED E
"Ɛ", // U+0190: LATIN CAPITAL LETTER OPEN E
"Ȅ", // U+0204: LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
"Ȇ", // U+0206: LATIN CAPITAL LETTER E WITH INVERTED BREVE
"Ȩ", // U+0228: LATIN CAPITAL LETTER E WITH CEDILLA
"Ɇ", // U+0246: LATIN CAPITAL LETTER E WITH STROKE
"ᴇ", // U+1D07: LATIN LETTER SMALL CAPITAL E
"Ḕ", // U+1E14: LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
"Ḗ", // U+1E16: LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
"Ḙ", // U+1E18: LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
"Ḛ", // U+1E1A: LATIN CAPITAL LETTER E WITH TILDE BELOW
"Ḝ", // U+1E1C: LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
"Ẹ", // U+1EB8: LATIN CAPITAL LETTER E WITH DOT BELOW
"Ẻ", // U+1EBA: LATIN CAPITAL LETTER E WITH HOOK ABOVE
"Ẽ", // U+1EBC: LATIN CAPITAL LETTER E WITH TILDE
"Ế", // U+1EBE: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
"Ề", // U+1EC0: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
"Ể", // U+1EC2: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
"Ễ", // U+1EC4: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
"Ệ", // U+1EC6: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
"Ⓔ", // U+24BA: CIRCLED LATIN CAPITAL LETTER E
"ⱻ", // U+2C7B: LATIN LETTER SMALL CAPITAL TURNED E
"E", // U+FF25: FULLWIDTH LATIN CAPITAL LETTER E
],
"E",
),
(
&[
"è", // U+00E8: LATIN SMALL LETTER E WITH GRAVE
"é", // U+00E9: LATIN SMALL LETTER E WITH ACUTE
"ê", // U+00EA: LATIN SMALL LETTER E WITH CIRCUMFLEX
"ë", // U+00EB: LATIN SMALL LETTER E WITH DIAERESIS
"ē", // U+0113: LATIN SMALL LETTER E WITH MACRON
"ĕ", // U+0115: LATIN SMALL LETTER E WITH BREVE
"ė", // U+0117: LATIN SMALL LETTER E WITH DOT ABOVE
"ę", // U+0119: LATIN SMALL LETTER E WITH OGONEK
"ě", // U+011B: LATIN SMALL LETTER E WITH CARON
"ǝ", // U+01DD: LATIN SMALL LETTER TURNED E
"ȅ", // U+0205: LATIN SMALL LETTER E WITH DOUBLE GRAVE
"ȇ", // U+0207: LATIN SMALL LETTER E WITH INVERTED BREVE
"ȩ", // U+0229: LATIN SMALL LETTER E WITH CEDILLA
"ɇ", // U+0247: LATIN SMALL LETTER E WITH STROKE
"ɘ", // U+0258: LATIN SMALL LETTER REVERSED E
"ɛ", // U+025B: LATIN SMALL LETTER OPEN E
"ɜ", // U+025C: LATIN SMALL LETTER REVERSED OPEN E
"ɝ", // U+025D: LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
"ɞ", // U+025E: LATIN SMALL LETTER CLOSED REVERSED OPEN E
"ʚ", // U+029A: LATIN SMALL LETTER CLOSED OPEN E
"ᴈ", // U+1D08: LATIN SMALL LETTER TURNED OPEN E
"ᶒ", // U+1D92: LATIN SMALL LETTER E WITH RETROFLEX HOOK
"ᶓ", // U+1D93: LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK
"ᶔ", // U+1D94: LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK
"ḕ", // U+1E15: LATIN SMALL LETTER E WITH MACRON AND GRAVE
"ḗ", // U+1E17: LATIN SMALL LETTER E WITH MACRON AND ACUTE
"ḙ", // U+1E19: LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
"ḛ", // U+1E1B: LATIN SMALL LETTER E WITH TILDE BELOW
"ḝ", // U+1E1D: LATIN SMALL LETTER E WITH CEDILLA AND BREVE
"ẹ", // U+1EB9: LATIN SMALL LETTER E WITH DOT BELOW
"ẻ", // U+1EBB: LATIN SMALL LETTER E WITH HOOK ABOVE
"ẽ", // U+1EBD: LATIN SMALL LETTER E WITH TILDE
"ế", // U+1EBF: LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
"ề", // U+1EC1: LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
"ể", // U+1EC3: LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
"ễ", // U+1EC5: LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
"ệ", // U+1EC7: LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
"ₑ", // U+2091: LATIN SUBSCRIPT SMALL LETTER E
"ⓔ", // U+24D4: CIRCLED LATIN SMALL LETTER E
"ⱸ", // U+2C78: LATIN SMALL LETTER E WITH NOTCH
"e", // U+FF45: FULLWIDTH LATIN SMALL LETTER E
],
"e",
),
(
&[
"⒠", // U+24A0: PARENTHESIZED LATIN SMALL LETTER E
],
"(e)",
),
(
&[
"Ƒ", // U+0191: LATIN CAPITAL LETTER F WITH HOOK
"Ḟ", // U+1E1E: LATIN CAPITAL LETTER F WITH DOT ABOVE
"Ⓕ", // U+24BB: CIRCLED LATIN CAPITAL LETTER F
"ꜰ", // U+A730: LATIN LETTER SMALL CAPITAL F
"Ꝼ", // U+A77B: LATIN CAPITAL LETTER INSULAR F
"ꟻ", // U+A7FB: LATIN EPIGRAPHIC LETTER REVERSED F
"F", // U+FF26: FULLWIDTH LATIN CAPITAL LETTER F
],
"F",
),
(
&[
"ƒ", // U+0192: LATIN SMALL LETTER F WITH HOOK
"ᵮ", // U+1D6E: LATIN SMALL LETTER F WITH MIDDLE TILDE
"ᶂ", // U+1D82: LATIN SMALL LETTER F WITH PALATAL HOOK
"ḟ", // U+1E1F: LATIN SMALL LETTER F WITH DOT ABOVE
"ẛ", // U+1E9B: LATIN SMALL LETTER LONG S WITH DOT ABOVE
"ⓕ", // U+24D5: CIRCLED LATIN SMALL LETTER F
"ꝼ", // U+A77C: LATIN SMALL LETTER INSULAR F
"f", // U+FF46: FULLWIDTH LATIN SMALL LETTER F
],
"f",
),
(
&[
"⒡", // U+24A1: PARENTHESIZED LATIN SMALL LETTER F
],
"(f)",
),
(
&[
"ff", // U+FB00: LATIN SMALL LIGATURE FF
],
"ff",
),
(
&[
"ffi", // U+FB03: LATIN SMALL LIGATURE FFI
],
"ffi",
),
(
&[
"ffl", // U+FB04: LATIN SMALL LIGATURE FFL
],
"ffl",
),
(
&[
"fi", // U+FB01: LATIN SMALL LIGATURE FI
],
"fi",
),
(
&[
"fl", // U+FB02: LATIN SMALL LIGATURE FL
],
"fl",
),
(
&[
"Ĝ", // U+011C: LATIN CAPITAL LETTER G WITH CIRCUMFLEX
"Ğ", // U+011E: LATIN CAPITAL LETTER G WITH BREVE
"Ġ", // U+0120: LATIN CAPITAL LETTER G WITH DOT ABOVE
"Ģ", // U+0122: LATIN CAPITAL LETTER G WITH CEDILLA
"Ɠ", // U+0193: LATIN CAPITAL LETTER G WITH HOOK
"Ǥ", // U+01E4: LATIN CAPITAL LETTER G WITH STROKE
"ǥ", // U+01E5: LATIN SMALL LETTER G WITH STROKE
"Ǧ", // U+01E6: LATIN CAPITAL LETTER G WITH CARON
"ǧ", // U+01E7: LATIN SMALL LETTER G WITH CARON
"Ǵ", // U+01F4: LATIN CAPITAL LETTER G WITH ACUTE
"ɢ", // U+0262: LATIN LETTER SMALL CAPITAL G
"ʛ", // U+029B: LATIN LETTER SMALL CAPITAL G WITH HOOK
"Ḡ", // U+1E20: LATIN CAPITAL LETTER G WITH MACRON
"Ⓖ", // U+24BC: CIRCLED LATIN CAPITAL LETTER G
"Ᵹ", // U+A77D: LATIN CAPITAL LETTER INSULAR G
"Ꝿ", // U+A77E: LATIN CAPITAL LETTER TURNED INSULAR G
"G", // U+FF27: FULLWIDTH LATIN CAPITAL LETTER G
],
"G",
),
(
&[
"ĝ", // U+011D: LATIN SMALL LETTER G WITH CIRCUMFLEX
"ğ", // U+011F: LATIN SMALL LETTER G WITH BREVE
"ġ", // U+0121: LATIN SMALL LETTER G WITH DOT ABOVE
"ģ", // U+0123: LATIN SMALL LETTER G WITH CEDILLA
"ǵ", // U+01F5: LATIN SMALL LETTER G WITH ACUTE
"ɠ", // U+0260: LATIN SMALL LETTER G WITH HOOK
"ɡ", // U+0261: LATIN SMALL LETTER SCRIPT G
"ᵷ", // U+1D77: LATIN SMALL LETTER TURNED G
"ᵹ", // U+1D79: LATIN SMALL LETTER INSULAR G
"ᶃ", // U+1D83: LATIN SMALL LETTER G WITH PALATAL HOOK
"ḡ", // U+1E21: LATIN SMALL LETTER G WITH MACRON
"ⓖ", // U+24D6: CIRCLED LATIN SMALL LETTER G
"ꝿ", // U+A77F: LATIN SMALL LETTER TURNED INSULAR G
"g", // U+FF47: FULLWIDTH LATIN SMALL LETTER G
],
"g",
),
(
&[
"⒢", // U+24A2: PARENTHESIZED LATIN SMALL LETTER G
],
"(g)",
),
(
&[
"Ĥ", // U+0124: LATIN CAPITAL LETTER H WITH CIRCUMFLEX
"Ħ", // U+0126: LATIN CAPITAL LETTER H WITH STROKE
"Ȟ", // U+021E: LATIN CAPITAL LETTER H WITH CARON
"ʜ", // U+029C: LATIN LETTER SMALL CAPITAL H
"Ḣ", // U+1E22: LATIN CAPITAL LETTER H WITH DOT ABOVE
"Ḥ", // U+1E24: LATIN CAPITAL LETTER H WITH DOT BELOW
"Ḧ", // U+1E26: LATIN CAPITAL LETTER H WITH DIAERESIS
"Ḩ", // U+1E28: LATIN CAPITAL LETTER H WITH CEDILLA
"Ḫ", // U+1E2A: LATIN CAPITAL LETTER H WITH BREVE BELOW
"Ⓗ", // U+24BD: CIRCLED LATIN CAPITAL LETTER H
"Ⱨ", // U+2C67: LATIN CAPITAL LETTER H WITH DESCENDER
"Ⱶ", // U+2C75: LATIN CAPITAL LETTER HALF H
"H", // U+FF28: FULLWIDTH LATIN CAPITAL LETTER H
],
"H",
),
(
&[
"ĥ", // U+0125: LATIN SMALL LETTER H WITH CIRCUMFLEX
"ħ", // U+0127: LATIN SMALL LETTER H WITH STROKE
"ȟ", // U+021F: LATIN SMALL LETTER H WITH CARON
"ɥ", // U+0265: LATIN SMALL LETTER TURNED H
"ɦ", // U+0266: LATIN SMALL LETTER H WITH HOOK
"ʮ", // U+02AE: LATIN SMALL LETTER TURNED H WITH FISHHOOK
"ʯ", // U+02AF: LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
"ḣ", // U+1E23: LATIN SMALL LETTER H WITH DOT ABOVE
"ḥ", // U+1E25: LATIN SMALL LETTER H WITH DOT BELOW
"ḧ", // U+1E27: LATIN SMALL LETTER H WITH DIAERESIS
"ḩ", // U+1E29: LATIN SMALL LETTER H WITH CEDILLA
"ḫ", // U+1E2B: LATIN SMALL LETTER H WITH BREVE BELOW
"ẖ", // U+1E96: LATIN SMALL LETTER H WITH LINE BELOW
"ⓗ", // U+24D7: CIRCLED LATIN SMALL LETTER H
"ⱨ", // U+2C68: LATIN SMALL LETTER H WITH DESCENDER
"ⱶ", // U+2C76: LATIN SMALL LETTER HALF H
"h", // U+FF48: FULLWIDTH LATIN SMALL LETTER H
],
"h",
),
(
&[
"Ƕ", // U+01F6: LATIN CAPITAL LETTER HWAIR
],
"HV",
),
(
&[
"⒣", // U+24A3: PARENTHESIZED LATIN SMALL LETTER H
],
"(h)",
),
(
&[
"ƕ", // U+0195: LATIN SMALL LETTER HV
],
"hv",
),
(
&[
"Ì", // U+00CC: LATIN CAPITAL LETTER I WITH GRAVE
"Í", // U+00CD: LATIN CAPITAL LETTER I WITH ACUTE
"Î", // U+00CE: LATIN CAPITAL LETTER I WITH CIRCUMFLEX
"Ï", // U+00CF: LATIN CAPITAL LETTER I WITH DIAERESIS
"Ĩ", // U+0128: LATIN CAPITAL LETTER I WITH TILDE
"Ī", // U+012A: LATIN CAPITAL LETTER I WITH MACRON
"Ĭ", // U+012C: LATIN CAPITAL LETTER I WITH BREVE
"Į", // U+012E: LATIN CAPITAL LETTER I WITH OGONEK
"İ", // U+0130: LATIN CAPITAL LETTER I WITH DOT ABOVE
"Ɩ", // U+0196: LATIN CAPITAL LETTER IOTA
"Ɨ", // U+0197: LATIN CAPITAL LETTER I WITH STROKE
"Ǐ", // U+01CF: LATIN CAPITAL LETTER I WITH CARON
"Ȉ", // U+0208: LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
"Ȋ", // U+020A: LATIN CAPITAL LETTER I WITH INVERTED BREVE
"ɪ", // U+026A: LATIN LETTER SMALL CAPITAL I
"ᵻ", // U+1D7B: LATIN SMALL CAPITAL LETTER I WITH STROKE
"Ḭ", // U+1E2C: LATIN CAPITAL LETTER I WITH TILDE BELOW
"Ḯ", // U+1E2E: LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
"Ỉ", // U+1EC8: LATIN CAPITAL LETTER I WITH HOOK ABOVE
"Ị", // U+1ECA: LATIN CAPITAL LETTER I WITH DOT BELOW
"Ⓘ", // U+24BE: CIRCLED LATIN CAPITAL LETTER I
"ꟾ", // U+A7FE: LATIN EPIGRAPHIC LETTER I LONGA
"I", // U+FF29: FULLWIDTH LATIN CAPITAL LETTER I
],
"I",
),
(
&[
"ì", // U+00EC: LATIN SMALL LETTER I WITH GRAVE
"í", // U+00ED: LATIN SMALL LETTER I WITH ACUTE
"î", // U+00EE: LATIN SMALL LETTER I WITH CIRCUMFLEX
"ï", // U+00EF: LATIN SMALL LETTER I WITH DIAERESIS
"ĩ", // U+0129: LATIN SMALL LETTER I WITH TILDE
"ī", // U+012B: LATIN SMALL LETTER I WITH MACRON
"ĭ", // U+012D: LATIN SMALL LETTER I WITH BREVE
"į", // U+012F: LATIN SMALL LETTER I WITH OGONEK
"ı", // U+0131: LATIN SMALL LETTER DOTLESS I
"ǐ", // U+01D0: LATIN SMALL LETTER I WITH CARON
"ȉ", // U+0209: LATIN SMALL LETTER I WITH DOUBLE GRAVE
"ȋ", // U+020B: LATIN SMALL LETTER I WITH INVERTED BREVE
"ɨ", // U+0268: LATIN SMALL LETTER I WITH STROKE
"ᴉ", // U+1D09: LATIN SMALL LETTER TURNED I
"ᵢ", // U+1D62: LATIN SUBSCRIPT SMALL LETTER I
"ᵼ", // U+1D7C: LATIN SMALL LETTER IOTA WITH STROKE
"ᶖ", // U+1D96: LATIN SMALL LETTER I WITH RETROFLEX HOOK
"ḭ", // U+1E2D: LATIN SMALL LETTER I WITH TILDE BELOW
"ḯ", // U+1E2F: LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
"ỉ", // U+1EC9: LATIN SMALL LETTER I WITH HOOK ABOVE
"ị", // U+1ECB: LATIN SMALL LETTER I WITH DOT BELOW
"ⁱ", // U+2071: SUPERSCRIPT LATIN SMALL LETTER I
"ⓘ", // U+24D8: CIRCLED LATIN SMALL LETTER I
"i", // U+FF49: FULLWIDTH LATIN SMALL LETTER I
],
"i",
),
(
&[
"IJ", // U+0132: LATIN CAPITAL LIGATURE IJ
],
"IJ",
),
(
&[
"⒤", // U+24A4: PARENTHESIZED LATIN SMALL LETTER I
],
"(i)",
),
(
&[
"ij", // U+0133: LATIN SMALL LIGATURE IJ
],
"ij",
),
(
&[
"Ĵ", // U+0134: LATIN CAPITAL LETTER J WITH CIRCUMFLEX
"Ɉ", // U+0248: LATIN CAPITAL LETTER J WITH STROKE
"ᴊ", // U+1D0A: LATIN LETTER SMALL CAPITAL J
"Ⓙ", // U+24BF: CIRCLED LATIN CAPITAL LETTER J
"J", // U+FF2A: FULLWIDTH LATIN CAPITAL LETTER J
],
"J",
),
(
&[
"ĵ", // U+0135: LATIN SMALL LETTER J WITH CIRCUMFLEX
"ǰ", // U+01F0: LATIN SMALL LETTER J WITH CARON
"ȷ", // U+0237: LATIN SMALL LETTER DOTLESS J
"ɉ", // U+0249: LATIN SMALL LETTER J WITH STROKE
"ɟ", // U+025F: LATIN SMALL LETTER DOTLESS J WITH STROKE
"ʄ", // U+0284: LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
"ʝ", // U+029D: LATIN SMALL LETTER J WITH CROSSED-TAIL
"ⓙ", // U+24D9: CIRCLED LATIN SMALL LETTER J
"ⱼ", // U+2C7C: LATIN SUBSCRIPT SMALL LETTER J
"j", // U+FF4A: FULLWIDTH LATIN SMALL LETTER J
],
"j",
),
(
&[
"⒥", // U+24A5: PARENTHESIZED LATIN SMALL LETTER J
],
"(j)",
),
(
&[
"Ķ", // U+0136: LATIN CAPITAL LETTER K WITH CEDILLA
"Ƙ", // U+0198: LATIN CAPITAL LETTER K WITH HOOK
"Ǩ", // U+01E8: LATIN CAPITAL LETTER K WITH CARON
"ᴋ", // U+1D0B: LATIN LETTER SMALL CAPITAL K
"Ḱ", // U+1E30: LATIN CAPITAL LETTER K WITH ACUTE
"Ḳ", // U+1E32: LATIN CAPITAL LETTER K WITH DOT BELOW
"Ḵ", // U+1E34: LATIN CAPITAL LETTER K WITH LINE BELOW
"Ⓚ", // U+24C0: CIRCLED LATIN CAPITAL LETTER K
"Ⱪ", // U+2C69: LATIN CAPITAL LETTER K WITH DESCENDER
"Ꝁ", // U+A740: LATIN CAPITAL LETTER K WITH STROKE
"Ꝃ", // U+A742: LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
"Ꝅ", // U+A744: LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
"K", // U+FF2B: FULLWIDTH LATIN CAPITAL LETTER K
],
"K",
),
(
&[
"ķ", // U+0137: LATIN SMALL LETTER K WITH CEDILLA
"ƙ", // U+0199: LATIN SMALL LETTER K WITH HOOK
"ǩ", // U+01E9: LATIN SMALL LETTER K WITH CARON
"ʞ", // U+029E: LATIN SMALL LETTER TURNED K
"ᶄ", // U+1D84: LATIN SMALL LETTER K WITH PALATAL HOOK
"ḱ", // U+1E31: LATIN SMALL LETTER K WITH ACUTE
"ḳ", // U+1E33: LATIN SMALL LETTER K WITH DOT BELOW
"ḵ", // U+1E35: LATIN SMALL LETTER K WITH LINE BELOW
"ⓚ", // U+24DA: CIRCLED LATIN SMALL LETTER K
"ⱪ", // U+2C6A: LATIN SMALL LETTER K WITH DESCENDER
"ꝁ", // U+A741: LATIN SMALL LETTER K WITH STROKE
"ꝃ", // U+A743: LATIN SMALL LETTER K WITH DIAGONAL STROKE
"ꝅ", // U+A745: LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
"k", // U+FF4B: FULLWIDTH LATIN SMALL LETTER K
],
"k",
),
(
&[
"⒦", // U+24A6: PARENTHESIZED LATIN SMALL LETTER K
],
"(k)",
),
(
&[
"Ĺ", // U+0139: LATIN CAPITAL LETTER L WITH ACUTE
"Ļ", // U+013B: LATIN CAPITAL LETTER L WITH CEDILLA
"Ľ", // U+013D: LATIN CAPITAL LETTER L WITH CARON
"Ŀ", // U+013F: LATIN CAPITAL LETTER L WITH MIDDLE DOT
"Ł", // U+0141: LATIN CAPITAL LETTER L WITH STROKE
"Ƚ", // U+023D: LATIN CAPITAL LETTER L WITH BAR
"ʟ", // U+029F: LATIN LETTER SMALL CAPITAL L
"ᴌ", // U+1D0C: LATIN LETTER SMALL CAPITAL L WITH STROKE
"Ḷ", // U+1E36: LATIN CAPITAL LETTER L WITH DOT BELOW
"Ḹ", // U+1E38: LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
"Ḻ", // U+1E3A: LATIN CAPITAL LETTER L WITH LINE BELOW
"Ḽ", // U+1E3C: LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
"Ⓛ", // U+24C1: CIRCLED LATIN CAPITAL LETTER L
"Ⱡ", // U+2C60: LATIN CAPITAL LETTER L WITH DOUBLE BAR
"Ɫ", // U+2C62: LATIN CAPITAL LETTER L WITH MIDDLE TILDE
"Ꝇ", // U+A746: LATIN CAPITAL LETTER BROKEN L
"Ꝉ", // U+A748: LATIN CAPITAL LETTER L WITH HIGH STROKE
"Ꞁ", // U+A780: LATIN CAPITAL LETTER TURNED L
"L", // U+FF2C: FULLWIDTH LATIN CAPITAL LETTER L
],
"L",
),
(
&[
"ĺ", // U+013A: LATIN SMALL LETTER L WITH ACUTE
"ļ", // U+013C: LATIN SMALL LETTER L WITH CEDILLA
"ľ", // U+013E: LATIN SMALL LETTER L WITH CARON
"ŀ", // U+0140: LATIN SMALL LETTER L WITH MIDDLE DOT
"ł", // U+0142: LATIN SMALL LETTER L WITH STROKE
"ƚ", // U+019A: LATIN SMALL LETTER L WITH BAR
"ȴ", // U+0234: LATIN SMALL LETTER L WITH CURL
"ɫ", // U+026B: LATIN SMALL LETTER L WITH MIDDLE TILDE
"ɬ", // U+026C: LATIN SMALL LETTER L WITH BELT
"ɭ", // U+026D: LATIN SMALL LETTER L WITH RETROFLEX HOOK
"ᶅ", // U+1D85: LATIN SMALL LETTER L WITH PALATAL HOOK
"ḷ", // U+1E37: LATIN SMALL LETTER L WITH DOT BELOW
"ḹ", // U+1E39: LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
"ḻ", // U+1E3B: LATIN SMALL LETTER L WITH LINE BELOW
"ḽ", // U+1E3D: LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
"ⓛ", // U+24DB: CIRCLED LATIN SMALL LETTER L
"ⱡ", // U+2C61: LATIN SMALL LETTER L WITH DOUBLE BAR
"ꝇ", // U+A747: LATIN SMALL LETTER BROKEN L
"ꝉ", // U+A749: LATIN SMALL LETTER L WITH HIGH STROKE
"ꞁ", // U+A781: LATIN SMALL LETTER TURNED L
"l", // U+FF4C: FULLWIDTH LATIN SMALL LETTER L
],
"l",
),
(
&[
"LJ", // U+01C7: LATIN CAPITAL LETTER LJ
],
"LJ",
),
(
&[
"Ỻ", // U+1EFA: LATIN CAPITAL LETTER MIDDLE-WELSH LL
],
"LL",
),
(
&[
"Lj", // U+01C8: LATIN CAPITAL LETTER L WITH SMALL LETTER J
],
"Lj",
),
(
&[
"⒧", // U+24A7: PARENTHESIZED LATIN SMALL LETTER L
],
"(l)",
),
(
&[
"lj", // U+01C9: LATIN SMALL LETTER LJ
],
"lj",
),
(
&[
"ỻ", // U+1EFB: LATIN SMALL LETTER MIDDLE-WELSH LL
],
"ll",
),
(
&[
"ʪ", // U+02AA: LATIN SMALL LETTER LS DIGRAPH
],
"ls",
),
(
&[
"ʫ", // U+02AB: LATIN SMALL LETTER LZ DIGRAPH
],
"lz",
),
(
&[
"Ɯ", // U+019C: LATIN CAPITAL LETTER TURNED M
"ᴍ", // U+1D0D: LATIN LETTER SMALL CAPITAL M
"Ḿ", // U+1E3E: LATIN CAPITAL LETTER M WITH ACUTE
"Ṁ", // U+1E40: LATIN CAPITAL LETTER M WITH DOT ABOVE
"Ṃ", // U+1E42: LATIN CAPITAL LETTER M WITH DOT BELOW
"Ⓜ", // U+24C2: CIRCLED LATIN CAPITAL LETTER M
"Ɱ", // U+2C6E: LATIN CAPITAL LETTER M WITH HOOK
"ꟽ", // U+A7FD: LATIN EPIGRAPHIC LETTER INVERTED M
"ꟿ", // U+A7FF: LATIN EPIGRAPHIC LETTER ARCHAIC M
"M", // U+FF2D: FULLWIDTH LATIN CAPITAL LETTER M
],
"M",
),
(
&[
"ɯ", // U+026F: LATIN SMALL LETTER TURNED M
"ɰ", // U+0270: LATIN SMALL LETTER TURNED M WITH LONG LEG
"ɱ", // U+0271: LATIN SMALL LETTER M WITH HOOK
"ᵯ", // U+1D6F: LATIN SMALL LETTER M WITH MIDDLE TILDE
"ᶆ", // U+1D86: LATIN SMALL LETTER M WITH PALATAL HOOK
"ḿ", // U+1E3F: LATIN SMALL LETTER M WITH ACUTE
"ṁ", // U+1E41: LATIN SMALL LETTER M WITH DOT ABOVE
"ṃ", // U+1E43: LATIN SMALL LETTER M WITH DOT BELOW
"ⓜ", // U+24DC: CIRCLED LATIN SMALL LETTER M
"m", // U+FF4D: FULLWIDTH LATIN SMALL LETTER M
],
"m",
),
(
&[
"⒨", // U+24A8: PARENTHESIZED LATIN SMALL LETTER M
],
"(m)",
),
(
&[
"Ñ", // U+00D1: LATIN CAPITAL LETTER N WITH TILDE
"Ń", // U+0143: LATIN CAPITAL LETTER N WITH ACUTE
"Ņ", // U+0145: LATIN CAPITAL LETTER N WITH CEDILLA
"Ň", // U+0147: LATIN CAPITAL LETTER N WITH CARON
"Ŋ", // U+014A: LATIN CAPITAL LETTER ENG
"Ɲ", // U+019D: LATIN CAPITAL LETTER N WITH LEFT HOOK
"Ǹ", // U+01F8: LATIN CAPITAL LETTER N WITH GRAVE
"Ƞ", // U+0220: LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
"ɴ", // U+0274: LATIN LETTER SMALL CAPITAL N
"ᴎ", // U+1D0E: LATIN LETTER SMALL CAPITAL REVERSED N
"Ṅ", // U+1E44: LATIN CAPITAL LETTER N WITH DOT ABOVE
"Ṇ", // U+1E46: LATIN CAPITAL LETTER N WITH DOT BELOW
"Ṉ", // U+1E48: LATIN CAPITAL LETTER N WITH LINE BELOW
"Ṋ", // U+1E4A: LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
"Ⓝ", // U+24C3: CIRCLED LATIN CAPITAL LETTER N
"N", // U+FF2E: FULLWIDTH LATIN CAPITAL LETTER N
],
"N",
),
(
&[
"ñ", // U+00F1: LATIN SMALL LETTER N WITH TILDE
"ń", // U+0144: LATIN SMALL LETTER N WITH ACUTE
"ņ", // U+0146: LATIN SMALL LETTER N WITH CEDILLA
"ň", // U+0148: LATIN SMALL LETTER N WITH CARON
"ʼn", // U+0149: LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
"ŋ", // U+014B: LATIN SMALL LETTER ENG
"ƞ", // U+019E: LATIN SMALL LETTER N WITH LONG RIGHT LEG
"ǹ", // U+01F9: LATIN SMALL LETTER N WITH GRAVE
"ȵ", // U+0235: LATIN SMALL LETTER N WITH CURL
"ɲ", // U+0272: LATIN SMALL LETTER N WITH LEFT HOOK
"ɳ", // U+0273: LATIN SMALL LETTER N WITH RETROFLEX HOOK
"ᵰ", // U+1D70: LATIN SMALL LETTER N WITH MIDDLE TILDE
"ᶇ", // U+1D87: LATIN SMALL LETTER N WITH PALATAL HOOK
"ṅ", // U+1E45: LATIN SMALL LETTER N WITH DOT ABOVE
"ṇ", // U+1E47: LATIN SMALL LETTER N WITH DOT BELOW
"ṉ", // U+1E49: LATIN SMALL LETTER N WITH LINE BELOW
"ṋ", // U+1E4B: LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
"ⁿ", // U+207F: SUPERSCRIPT LATIN SMALL LETTER N
"ⓝ", // U+24DD: CIRCLED LATIN SMALL LETTER N
"n", // U+FF4E: FULLWIDTH LATIN SMALL LETTER N
],
"n",
),
(
&[
"NJ", // U+01CA: LATIN CAPITAL LETTER NJ
],
"NJ",
),
(
&[
"Nj", // U+01CB: LATIN CAPITAL LETTER N WITH SMALL LETTER J
],
"Nj",
),
(
&[
"⒩", // U+24A9: PARENTHESIZED LATIN SMALL LETTER N
],
"(n)",
),
(
&[
"nj", // U+01CC: LATIN SMALL LETTER NJ
],
"nj",
),
(
&[
"Ò", // U+00D2: LATIN CAPITAL LETTER O WITH GRAVE
"Ó", // U+00D3: LATIN CAPITAL LETTER O WITH ACUTE
"Ô", // U+00D4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX
"Õ", // U+00D5: LATIN CAPITAL LETTER O WITH TILDE
"Ö", // U+00D6: LATIN CAPITAL LETTER O WITH DIAERESIS
"Ø", // U+00D8: LATIN CAPITAL LETTER O WITH STROKE
"Ō", // U+014C: LATIN CAPITAL LETTER O WITH MACRON
"Ŏ", // U+014E: LATIN CAPITAL LETTER O WITH BREVE
"Ő", // U+0150: LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
"Ɔ", // U+0186: LATIN CAPITAL LETTER OPEN O
"Ɵ", // U+019F: LATIN CAPITAL LETTER O WITH MIDDLE TILDE
"Ơ", // U+01A0: LATIN CAPITAL LETTER O WITH HORN
"Ǒ", // U+01D1: LATIN CAPITAL LETTER O WITH CARON
"Ǫ", // U+01EA: LATIN CAPITAL LETTER O WITH OGONEK
"Ǭ", // U+01EC: LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
"Ǿ", // U+01FE: LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
"Ȍ", // U+020C: LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
"Ȏ", // U+020E: LATIN CAPITAL LETTER O WITH INVERTED BREVE
"Ȫ", // U+022A: LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
"Ȭ", // U+022C: LATIN CAPITAL LETTER O WITH TILDE AND MACRON
"Ȯ", // U+022E: LATIN CAPITAL LETTER O WITH DOT ABOVE
"Ȱ", // U+0230: LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
"ᴏ", // U+1D0F: LATIN LETTER SMALL CAPITAL O
"ᴐ", // U+1D10: LATIN LETTER SMALL CAPITAL OPEN O
"Ṍ", // U+1E4C: LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
"Ṏ", // U+1E4E: LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
"Ṑ", // U+1E50: LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
"Ṓ", // U+1E52: LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
"Ọ", // U+1ECC: LATIN CAPITAL LETTER O WITH DOT BELOW
"Ỏ", // U+1ECE: LATIN CAPITAL LETTER O WITH HOOK ABOVE
"Ố", // U+1ED0: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
"Ồ", // U+1ED2: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
"Ổ", // U+1ED4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
"Ỗ", // U+1ED6: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
"Ộ", // U+1ED8: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
"Ớ", // U+1EDA: LATIN CAPITAL LETTER O WITH HORN AND ACUTE
"Ờ", // U+1EDC: LATIN CAPITAL LETTER O WITH HORN AND GRAVE
"Ở", // U+1EDE: LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
"Ỡ", // U+1EE0: LATIN CAPITAL LETTER O WITH HORN AND TILDE
"Ợ", // U+1EE2: LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
"Ⓞ", // U+24C4: CIRCLED LATIN CAPITAL LETTER O
"Ꝋ", // U+A74A: LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
"Ꝍ", // U+A74C: LATIN CAPITAL LETTER O WITH LOOP
"O", // U+FF2F: FULLWIDTH LATIN CAPITAL LETTER O
],
"O",
),
(
&[
"ò", // U+00F2: LATIN SMALL LETTER O WITH GRAVE
"ó", // U+00F3: LATIN SMALL LETTER O WITH ACUTE
"ô", // U+00F4: LATIN SMALL LETTER O WITH CIRCUMFLEX
"õ", // U+00F5: LATIN SMALL LETTER O WITH TILDE
"ö", // U+00F6: LATIN SMALL LETTER O WITH DIAERESIS
"ø", // U+00F8: LATIN SMALL LETTER O WITH STROKE
"ō", // U+014D: LATIN SMALL LETTER O WITH MACRON
"ŏ", // U+014F: LATIN SMALL LETTER O WITH BREVE
"ő", // U+0151: LATIN SMALL LETTER O WITH DOUBLE ACUTE
"ơ", // U+01A1: LATIN SMALL LETTER O WITH HORN
"ǒ", // U+01D2: LATIN SMALL LETTER O WITH CARON
"ǫ", // U+01EB: LATIN SMALL LETTER O WITH OGONEK
"ǭ", // U+01ED: LATIN SMALL LETTER O WITH OGONEK AND MACRON
"ǿ", // U+01FF: LATIN SMALL LETTER O WITH STROKE AND ACUTE
"ȍ", // U+020D: LATIN SMALL LETTER O WITH DOUBLE GRAVE
"ȏ", // U+020F: LATIN SMALL LETTER O WITH INVERTED BREVE
"ȫ", // U+022B: LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
"ȭ", // U+022D: LATIN SMALL LETTER O WITH TILDE AND MACRON
"ȯ", // U+022F: LATIN SMALL LETTER O WITH DOT ABOVE
"ȱ", // U+0231: LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
"ɔ", // U+0254: LATIN SMALL LETTER OPEN O
"ɵ", // U+0275: LATIN SMALL LETTER BARRED O
"ᴖ", // U+1D16: LATIN SMALL LETTER TOP HALF O
"ᴗ", // U+1D17: LATIN SMALL LETTER BOTTOM HALF O
"ᶗ", // U+1D97: LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK
"ṍ", // U+1E4D: LATIN SMALL LETTER O WITH TILDE AND ACUTE
"ṏ", // U+1E4F: LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
"ṑ", // U+1E51: LATIN SMALL LETTER O WITH MACRON AND GRAVE
"ṓ", // U+1E53: LATIN SMALL LETTER O WITH MACRON AND ACUTE
"ọ", // U+1ECD: LATIN SMALL LETTER O WITH DOT BELOW
"ỏ", // U+1ECF: LATIN SMALL LETTER O WITH HOOK ABOVE
"ố", // U+1ED1: LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
"ồ", // U+1ED3: LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
"ổ", // U+1ED5: LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
"ỗ", // U+1ED7: LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
"ộ", // U+1ED9: LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
"ớ", // U+1EDB: LATIN SMALL LETTER O WITH HORN AND ACUTE
"ờ", // U+1EDD: LATIN SMALL LETTER O WITH HORN AND GRAVE
"ở", // U+1EDF: LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
"ỡ", // U+1EE1: LATIN SMALL LETTER O WITH HORN AND TILDE
"ợ", // U+1EE3: LATIN SMALL LETTER O WITH HORN AND DOT BELOW
"ₒ", // U+2092: LATIN SUBSCRIPT SMALL LETTER O
"ⓞ", // U+24DE: CIRCLED LATIN SMALL LETTER O
"ⱺ", // U+2C7A: LATIN SMALL LETTER O WITH LOW RING INSIDE
"ꝋ", // U+A74B: LATIN SMALL LETTER O WITH LONG STROKE OVERLAY
"ꝍ", // U+A74D: LATIN SMALL LETTER O WITH LOOP
"o", // U+FF4F: FULLWIDTH LATIN SMALL LETTER O
],
"o",
),
(
&[
"Œ", // U+0152: LATIN CAPITAL LIGATURE OE
"ɶ", // U+0276: LATIN LETTER SMALL CAPITAL OE
],
"OE",
),
(
&[
"Ꝏ", // U+A74E: LATIN CAPITAL LETTER OO
],
"OO",
),
(
&[
"Ȣ", // U+0222: LATIN CAPITAL LETTER OU
"ᴕ", // U+1D15: LATIN LETTER SMALL CAPITAL OU
],
"OU",
),
(
&[
"⒪", // U+24AA: PARENTHESIZED LATIN SMALL LETTER O
],
"(o)",
),
(
&[
"œ", // U+0153: LATIN SMALL LIGATURE OE
"ᴔ", // U+1D14: LATIN SMALL LETTER TURNED OE
],
"oe",
),
(
&[
"ꝏ", // U+A74F: LATIN SMALL LETTER OO
],
"oo",
),
(
&[
"ȣ", // U+0223: LATIN SMALL LETTER OU
],
"ou",
),
(
&[
"Ƥ", // U+01A4: LATIN CAPITAL LETTER P WITH HOOK
"ᴘ", // U+1D18: LATIN LETTER SMALL CAPITAL P
"Ṕ", // U+1E54: LATIN CAPITAL LETTER P WITH ACUTE
"Ṗ", // U+1E56: LATIN CAPITAL LETTER P WITH DOT ABOVE
"Ⓟ", // U+24C5: CIRCLED LATIN CAPITAL LETTER P
"Ᵽ", // U+2C63: LATIN CAPITAL LETTER P WITH STROKE
"Ꝑ", // U+A750: LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
"Ꝓ", // U+A752: LATIN CAPITAL LETTER P WITH FLOURISH
"Ꝕ", // U+A754: LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
"P", // U+FF30: FULLWIDTH LATIN CAPITAL LETTER P
],
"P",
),
(
&[
"ƥ", // U+01A5: LATIN SMALL LETTER P WITH HOOK
"ᵱ", // U+1D71: LATIN SMALL LETTER P WITH MIDDLE TILDE
"ᵽ", // U+1D7D: LATIN SMALL LETTER P WITH STROKE
"ᶈ", // U+1D88: LATIN SMALL LETTER P WITH PALATAL HOOK
"ṕ", // U+1E55: LATIN SMALL LETTER P WITH ACUTE
"ṗ", // U+1E57: LATIN SMALL LETTER P WITH DOT ABOVE
"ⓟ", // U+24DF: CIRCLED LATIN SMALL LETTER P
"ꝑ", // U+A751: LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER
"ꝓ", // U+A753: LATIN SMALL LETTER P WITH FLOURISH
"ꝕ", // U+A755: LATIN SMALL LETTER P WITH SQUIRREL TAIL
"ꟼ", // U+A7FC: LATIN EPIGRAPHIC LETTER REVERSED P
"p", // U+FF50: FULLWIDTH LATIN SMALL LETTER P
],
"p",
),
(
&[
"⒫", // U+24AB: PARENTHESIZED LATIN SMALL LETTER P
],
"(p)",
),
(
&[
"Ɋ", // U+024A: LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
"Ⓠ", // U+24C6: CIRCLED LATIN CAPITAL LETTER Q
"Ꝗ", // U+A756: LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
"Ꝙ", // U+A758: LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
"Q", // U+FF31: FULLWIDTH LATIN CAPITAL LETTER Q
],
"Q",
),
(
&[
"ĸ", // U+0138: LATIN SMALL LETTER KRA
"ɋ", // U+024B: LATIN SMALL LETTER Q WITH HOOK TAIL
"ʠ", // U+02A0: LATIN SMALL LETTER Q WITH HOOK
"ⓠ", // U+24E0: CIRCLED LATIN SMALL LETTER Q
"ꝗ", // U+A757: LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
"ꝙ", // U+A759: LATIN SMALL LETTER Q WITH DIAGONAL STROKE
"q", // U+FF51: FULLWIDTH LATIN SMALL LETTER Q
],
"q",
),
(
&[
"⒬", // U+24AC: PARENTHESIZED LATIN SMALL LETTER Q
],
"(q)",
),
(
&[
"ȹ", // U+0239: LATIN SMALL LETTER QP DIGRAPH
],
"qp",
),
(
&[
"Ŕ", // U+0154: LATIN CAPITAL LETTER R WITH ACUTE
"Ŗ", // U+0156: LATIN CAPITAL LETTER R WITH CEDILLA
"Ř", // U+0158: LATIN CAPITAL LETTER R WITH CARON
"Ȑ", // U+0210: LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
"Ȓ", // U+0212: LATIN CAPITAL LETTER R WITH INVERTED BREVE
"Ɍ", // U+024C: LATIN CAPITAL LETTER R WITH STROKE
"ʀ", // U+0280: LATIN LETTER SMALL CAPITAL R
"ʁ", // U+0281: LATIN LETTER SMALL CAPITAL INVERTED R
"ᴙ", // U+1D19: LATIN LETTER SMALL CAPITAL REVERSED R
"ᴚ", // U+1D1A: LATIN LETTER SMALL CAPITAL TURNED R
"Ṙ", // U+1E58: LATIN CAPITAL LETTER R WITH DOT ABOVE
"Ṛ", // U+1E5A: LATIN CAPITAL LETTER R WITH DOT BELOW
"Ṝ", // U+1E5C: LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
"Ṟ", // U+1E5E: LATIN CAPITAL LETTER R WITH LINE BELOW
"Ⓡ", // U+24C7: CIRCLED LATIN CAPITAL LETTER R
"Ɽ", // U+2C64: LATIN CAPITAL LETTER R WITH TAIL
"Ꝛ", // U+A75A: LATIN CAPITAL LETTER R ROTUNDA
"Ꞃ", // U+A782: LATIN CAPITAL LETTER INSULAR R
"R", // U+FF32: FULLWIDTH LATIN CAPITAL LETTER R
],
"R",
),
(
&[
"ŕ", // U+0155: LATIN SMALL LETTER R WITH ACUTE
"ŗ", // U+0157: LATIN SMALL LETTER R WITH CEDILLA
"ř", // U+0159: LATIN SMALL LETTER R WITH CARON
"ȑ", // U+0211: LATIN SMALL LETTER R WITH DOUBLE GRAVE
"ȓ", // U+0213: LATIN SMALL LETTER R WITH INVERTED BREVE
"ɍ", // U+024D: LATIN SMALL LETTER R WITH STROKE
"ɼ", // U+027C: LATIN SMALL LETTER R WITH LONG LEG
"ɽ", // U+027D: LATIN SMALL LETTER R WITH TAIL
"ɾ", // U+027E: LATIN SMALL LETTER R WITH FISHHOOK
"ɿ", // U+027F: LATIN SMALL LETTER REVERSED R WITH FISHHOOK
"ᵣ", // U+1D63: LATIN SUBSCRIPT SMALL LETTER R
"ᵲ", // U+1D72: LATIN SMALL LETTER R WITH MIDDLE TILDE
"ᵳ", // U+1D73: LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE
"ᶉ", // U+1D89: LATIN SMALL LETTER R WITH PALATAL HOOK
"ṙ", // U+1E59: LATIN SMALL LETTER R WITH DOT ABOVE
"ṛ", // U+1E5B: LATIN SMALL LETTER R WITH DOT BELOW
"ṝ", // U+1E5D: LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
"ṟ", // U+1E5F: LATIN SMALL LETTER R WITH LINE BELOW
"ⓡ", // U+24E1: CIRCLED LATIN SMALL LETTER R
"ꝛ", // U+A75B: LATIN SMALL LETTER R ROTUNDA
"ꞃ", // U+A783: LATIN SMALL LETTER INSULAR R
"r", // U+FF52: FULLWIDTH LATIN SMALL LETTER R
],
"r",
),
(
&[
"⒭", // U+24AD: PARENTHESIZED LATIN SMALL LETTER R
],
"(r)",
),
(
&[
"Ś", // U+015A: LATIN CAPITAL LETTER S WITH ACUTE
"Ŝ", // U+015C: LATIN CAPITAL LETTER S WITH CIRCUMFLEX
"Ş", // U+015E: LATIN CAPITAL LETTER S WITH CEDILLA
"Š", // U+0160: LATIN CAPITAL LETTER S WITH CARON
"Ș", // U+0218: LATIN CAPITAL LETTER S WITH COMMA BELOW
"Ṡ", // U+1E60: LATIN CAPITAL LETTER S WITH DOT ABOVE
"Ṣ", // U+1E62: LATIN CAPITAL LETTER S WITH DOT BELOW
"Ṥ", // U+1E64: LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
"Ṧ", // U+1E66: LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
"Ṩ", // U+1E68: LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
"Ⓢ", // U+24C8: CIRCLED LATIN CAPITAL LETTER S
"ꜱ", // U+A731: LATIN LETTER SMALL CAPITAL S
"ꞅ", // U+A785: LATIN SMALL LETTER INSULAR S
"S", // U+FF33: FULLWIDTH LATIN CAPITAL LETTER S
],
"S",
),
(
&[
"ś", // U+015B: LATIN SMALL LETTER S WITH ACUTE
"ŝ", // U+015D: LATIN SMALL LETTER S WITH CIRCUMFLEX
"ş", // U+015F: LATIN SMALL LETTER S WITH CEDILLA
"š", // U+0161: LATIN SMALL LETTER S WITH CARON
"ſ", // U+017F: LATIN SMALL LETTER LONG S
"ș", // U+0219: LATIN SMALL LETTER S WITH COMMA BELOW
"ȿ", // U+023F: LATIN SMALL LETTER S WITH SWASH TAIL
"ʂ", // U+0282: LATIN SMALL LETTER S WITH HOOK
"ᵴ", // U+1D74: LATIN SMALL LETTER S WITH MIDDLE TILDE
"ᶊ", // U+1D8A: LATIN SMALL LETTER S WITH PALATAL HOOK
"ṡ", // U+1E61: LATIN SMALL LETTER S WITH DOT ABOVE
"ṣ", // U+1E63: LATIN SMALL LETTER S WITH DOT BELOW
"ṥ", // U+1E65: LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
"ṧ", // U+1E67: LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
"ṩ", // U+1E69: LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
"ẜ", // U+1E9C: LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE
"ẝ", // U+1E9D: LATIN SMALL LETTER LONG S WITH HIGH STROKE
"ⓢ", // U+24E2: CIRCLED LATIN SMALL LETTER S
"Ꞅ", // U+A784: LATIN CAPITAL LETTER INSULAR S
"s", // U+FF53: FULLWIDTH LATIN SMALL LETTER S
],
"s",
),
(
&[
"ẞ", // U+1E9E: LATIN CAPITAL LETTER SHARP S
],
"SS",
),
(
&[
"⒮", // U+24AE: PARENTHESIZED LATIN SMALL LETTER S
],
"(s)",
),
(
&[
"ß", // U+00DF: LATIN SMALL LETTER SHARP S
],
"ss",
),
(
&[
"st", // U+FB06: LATIN SMALL LIGATURE ST
],
"st",
),
(
&[
"Ţ", // U+0162: LATIN CAPITAL LETTER T WITH CEDILLA
"Ť", // U+0164: LATIN CAPITAL LETTER T WITH CARON
"Ŧ", // U+0166: LATIN CAPITAL LETTER T WITH STROKE
"Ƭ", // U+01AC: LATIN CAPITAL LETTER T WITH HOOK
"Ʈ", // U+01AE: LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
"Ț", // U+021A: LATIN CAPITAL LETTER T WITH COMMA BELOW
"Ⱦ", // U+023E: LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
"ᴛ", // U+1D1B: LATIN LETTER SMALL CAPITAL T
"Ṫ", // U+1E6A: LATIN CAPITAL LETTER T WITH DOT ABOVE
"Ṭ", // U+1E6C: LATIN CAPITAL LETTER T WITH DOT BELOW
"Ṯ", // U+1E6E: LATIN CAPITAL LETTER T WITH LINE BELOW
"Ṱ", // U+1E70: LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
"Ⓣ", // U+24C9: CIRCLED LATIN CAPITAL LETTER T
"Ꞇ", // U+A786: LATIN CAPITAL LETTER INSULAR T
"T", // U+FF34: FULLWIDTH LATIN CAPITAL LETTER T
],
"T",
),
(
&[
"ţ", // U+0163: LATIN SMALL LETTER T WITH CEDILLA
"ť", // U+0165: LATIN SMALL LETTER T WITH CARON
"ŧ", // U+0167: LATIN SMALL LETTER T WITH STROKE
"ƫ", // U+01AB: LATIN SMALL LETTER T WITH PALATAL HOOK
"ƭ", // U+01AD: LATIN SMALL LETTER T WITH HOOK
"ț", // U+021B: LATIN SMALL LETTER T WITH COMMA BELOW
"ȶ", // U+0236: LATIN SMALL LETTER T WITH CURL
"ʇ", // U+0287: LATIN SMALL LETTER TURNED T
"ʈ", // U+0288: LATIN SMALL LETTER T WITH RETROFLEX HOOK
"ᵵ", // U+1D75: LATIN SMALL LETTER T WITH MIDDLE TILDE
"ṫ", // U+1E6B: LATIN SMALL LETTER T WITH DOT ABOVE
"ṭ", // U+1E6D: LATIN SMALL LETTER T WITH DOT BELOW
"ṯ", // U+1E6F: LATIN SMALL LETTER T WITH LINE BELOW
"ṱ", // U+1E71: LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
"ẗ", // U+1E97: LATIN SMALL LETTER T WITH DIAERESIS
"ⓣ", // U+24E3: CIRCLED LATIN SMALL LETTER T
"ⱦ", // U+2C66: LATIN SMALL LETTER T WITH DIAGONAL STROKE
"t", // U+FF54: FULLWIDTH LATIN SMALL LETTER T
],
"t",
),
(
&[
"Þ", // U+00DE: LATIN CAPITAL LETTER THORN
"Ꝧ", // U+A766: LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
],
"TH",
),
(
&[
"Ꜩ", // U+A728: LATIN CAPITAL LETTER TZ
],
"TZ",
),
(
&[
"⒯", // U+24AF: PARENTHESIZED LATIN SMALL LETTER T
],
"(t)",
),
(
&[
"ʨ", // U+02A8: LATIN SMALL LETTER TC DIGRAPH WITH CURL
],
"tc",
),
(
&[
"þ", // U+00FE: LATIN SMALL LETTER THORN
"ᵺ", // U+1D7A: LATIN SMALL LETTER TH WITH STRIKETHROUGH
"ꝧ", // U+A767: LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
],
"th",
),
(
&[
"ʦ", // U+02A6: LATIN SMALL LETTER TS DIGRAPH
],
"ts",
),
(
&[
"ꜩ", // U+A729: LATIN SMALL LETTER TZ
],
"tz",
),
(
&[
"Ù", // U+00D9: LATIN CAPITAL LETTER U WITH GRAVE
"Ú", // U+00DA: LATIN CAPITAL LETTER U WITH ACUTE
"Û", // U+00DB: LATIN CAPITAL LETTER U WITH CIRCUMFLEX
"Ü", // U+00DC: LATIN CAPITAL LETTER U WITH DIAERESIS
"Ũ", // U+0168: LATIN CAPITAL LETTER U WITH TILDE
"Ū", // U+016A: LATIN CAPITAL LETTER U WITH MACRON
"Ŭ", // U+016C: LATIN CAPITAL LETTER U WITH BREVE
"Ů", // U+016E: LATIN CAPITAL LETTER U WITH RING ABOVE
"Ű", // U+0170: LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
"Ų", // U+0172: LATIN CAPITAL LETTER U WITH OGONEK
"Ư", // U+01AF: LATIN CAPITAL LETTER U WITH HORN
"Ǔ", // U+01D3: LATIN CAPITAL LETTER U WITH CARON
"Ǖ", // U+01D5: LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
"Ǘ", // U+01D7: LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
"Ǚ", // U+01D9: LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
"Ǜ", // U+01DB: LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
"Ȕ", // U+0214: LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
"Ȗ", // U+0216: LATIN CAPITAL LETTER U WITH INVERTED BREVE
"Ʉ", // U+0244: LATIN CAPITAL LETTER U BAR
"ᴜ", // U+1D1C: LATIN LETTER SMALL CAPITAL U
"ᵾ", // U+1D7E: LATIN SMALL CAPITAL LETTER U WITH STROKE
"Ṳ", // U+1E72: LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
"Ṵ", // U+1E74: LATIN CAPITAL LETTER U WITH TILDE BELOW
"Ṷ", // U+1E76: LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
"Ṹ", // U+1E78: LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
"Ṻ", // U+1E7A: LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
"Ụ", // U+1EE4: LATIN CAPITAL LETTER U WITH DOT BELOW
"Ủ", // U+1EE6: LATIN CAPITAL LETTER U WITH HOOK ABOVE
"Ứ", // U+1EE8: LATIN CAPITAL LETTER U WITH HORN AND ACUTE
"Ừ", // U+1EEA: LATIN CAPITAL LETTER U WITH HORN AND GRAVE
"Ử", // U+1EEC: LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
"Ữ", // U+1EEE: LATIN CAPITAL LETTER U WITH HORN AND TILDE
"Ự", // U+1EF0: LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
"Ⓤ", // U+24CA: CIRCLED LATIN CAPITAL LETTER U
"U", // U+FF35: FULLWIDTH LATIN CAPITAL LETTER U
],
"U",
),
(
&[
"ù", // U+00F9: LATIN SMALL LETTER U WITH GRAVE
"ú", // U+00FA: LATIN SMALL LETTER U WITH ACUTE
"û", // U+00FB: LATIN SMALL LETTER U WITH CIRCUMFLEX
"ü", // U+00FC: LATIN SMALL LETTER U WITH DIAERESIS
"ũ", // U+0169: LATIN SMALL LETTER U WITH TILDE
"ū", // U+016B: LATIN SMALL LETTER U WITH MACRON
"ŭ", // U+016D: LATIN SMALL LETTER U WITH BREVE
"ů", // U+016F: LATIN SMALL LETTER U WITH RING ABOVE
"ű", // U+0171: LATIN SMALL LETTER U WITH DOUBLE ACUTE
"ų", // U+0173: LATIN SMALL LETTER U WITH OGONEK
"ư", // U+01B0: LATIN SMALL LETTER U WITH HORN
"ǔ", // U+01D4: LATIN SMALL LETTER U WITH CARON
"ǖ", // U+01D6: LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
"ǘ", // U+01D8: LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
"ǚ", // U+01DA: LATIN SMALL LETTER U WITH DIAERESIS AND CARON
"ǜ", // U+01DC: LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
"ȕ", // U+0215: LATIN SMALL LETTER U WITH DOUBLE GRAVE
"ȗ", // U+0217: LATIN SMALL LETTER U WITH INVERTED BREVE
"ʉ", // U+0289: LATIN SMALL LETTER U BAR
"ᵤ", // U+1D64: LATIN SUBSCRIPT SMALL LETTER U
"ᶙ", // U+1D99: LATIN SMALL LETTER U WITH RETROFLEX HOOK
"ṳ", // U+1E73: LATIN SMALL LETTER U WITH DIAERESIS BELOW
"ṵ", // U+1E75: LATIN SMALL LETTER U WITH TILDE BELOW
"ṷ", // U+1E77: LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
"ṹ", // U+1E79: LATIN SMALL LETTER U WITH TILDE AND ACUTE
"ṻ", // U+1E7B: LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
"ụ", // U+1EE5: LATIN SMALL LETTER U WITH DOT BELOW
"ủ", // U+1EE7: LATIN SMALL LETTER U WITH HOOK ABOVE
"ứ", // U+1EE9: LATIN SMALL LETTER U WITH HORN AND ACUTE
"ừ", // U+1EEB: LATIN SMALL LETTER U WITH HORN AND GRAVE
"ử", // U+1EED: LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
"ữ", // U+1EEF: LATIN SMALL LETTER U WITH HORN AND TILDE
"ự", // U+1EF1: LATIN SMALL LETTER U WITH HORN AND DOT BELOW
"ⓤ", // U+24E4: CIRCLED LATIN SMALL LETTER U
"u", // U+FF55: FULLWIDTH LATIN SMALL LETTER U
],
"u",
),
(
&[
"⒰", // U+24B0: PARENTHESIZED LATIN SMALL LETTER U
],
"(u)",
),
(
&[
"ᵫ", // U+1D6B: LATIN SMALL LETTER UE
],
"ue",
),
(
&[
"Ʋ", // U+01B2: LATIN CAPITAL LETTER V WITH HOOK
"Ʌ", // U+0245: LATIN CAPITAL LETTER TURNED V
"ᴠ", // U+1D20: LATIN LETTER SMALL CAPITAL V
"Ṽ", // U+1E7C: LATIN CAPITAL LETTER V WITH TILDE
"Ṿ", // U+1E7E: LATIN CAPITAL LETTER V WITH DOT BELOW
"Ỽ", // U+1EFC: LATIN CAPITAL LETTER MIDDLE-WELSH V
"Ⓥ", // U+24CB: CIRCLED LATIN CAPITAL LETTER V
"Ꝟ", // U+A75E: LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
"Ꝩ", // U+A768: LATIN CAPITAL LETTER VEND
"V", // U+FF36: FULLWIDTH LATIN CAPITAL LETTER V
],
"V",
),
(
&[
"ʋ", // U+028B: LATIN SMALL LETTER V WITH HOOK
"ʌ", // U+028C: LATIN SMALL LETTER TURNED V
"ᵥ", // U+1D65: LATIN SUBSCRIPT SMALL LETTER V
"ᶌ", // U+1D8C: LATIN SMALL LETTER V WITH PALATAL HOOK
"ṽ", // U+1E7D: LATIN SMALL LETTER V WITH TILDE
"ṿ", // U+1E7F: LATIN SMALL LETTER V WITH DOT BELOW
"ⓥ", // U+24E5: CIRCLED LATIN SMALL LETTER V
"ⱱ", // U+2C71: LATIN SMALL LETTER V WITH RIGHT HOOK
"ⱴ", // U+2C74: LATIN SMALL LETTER V WITH CURL
"ꝟ", // U+A75F: LATIN SMALL LETTER V WITH DIAGONAL STROKE
"v", // U+FF56: FULLWIDTH LATIN SMALL LETTER V
],
"v",
),
(
&[
"Ꝡ", // U+A760: LATIN CAPITAL LETTER VY
],
"VY",
),
(
&[
"⒱", // U+24B1: PARENTHESIZED LATIN SMALL LETTER V
],
"(v)",
),
(
&[
"ꝡ", // U+A761: LATIN SMALL LETTER VY
],
"vy",
),
(
&[
"Ŵ", // U+0174: LATIN CAPITAL LETTER W WITH CIRCUMFLEX
"Ƿ", // U+01F7: LATIN CAPITAL LETTER WYNN
"ᴡ", // U+1D21: LATIN LETTER SMALL CAPITAL W
"Ẁ", // U+1E80: LATIN CAPITAL LETTER W WITH GRAVE
"Ẃ", // U+1E82: LATIN CAPITAL LETTER W WITH ACUTE
"Ẅ", // U+1E84: LATIN CAPITAL LETTER W WITH DIAERESIS
"Ẇ", // U+1E86: LATIN CAPITAL LETTER W WITH DOT ABOVE
"Ẉ", // U+1E88: LATIN CAPITAL LETTER W WITH DOT BELOW
"Ⓦ", // U+24CC: CIRCLED LATIN CAPITAL LETTER W
"Ⱳ", // U+2C72: LATIN CAPITAL LETTER W WITH HOOK
"W", // U+FF37: FULLWIDTH LATIN CAPITAL LETTER W
],
"W",
),
(
&[
"ŵ", // U+0175: LATIN SMALL LETTER W WITH CIRCUMFLEX
"ƿ", // U+01BF: LATIN LETTER WYNN
"ʍ", // U+028D: LATIN SMALL LETTER TURNED W
"ẁ", // U+1E81: LATIN SMALL LETTER W WITH GRAVE
"ẃ", // U+1E83: LATIN SMALL LETTER W WITH ACUTE
"ẅ", // U+1E85: LATIN SMALL LETTER W WITH DIAERESIS
"ẇ", // U+1E87: LATIN SMALL LETTER W WITH DOT ABOVE
"ẉ", // U+1E89: LATIN SMALL LETTER W WITH DOT BELOW
"ẘ", // U+1E98: LATIN SMALL LETTER W WITH RING ABOVE
"ⓦ", // U+24E6: CIRCLED LATIN SMALL LETTER W
"ⱳ", // U+2C73: LATIN SMALL LETTER W WITH HOOK
"w", // U+FF57: FULLWIDTH LATIN SMALL LETTER W
],
"w",
),
(
&[
"⒲", // U+24B2: PARENTHESIZED LATIN SMALL LETTER W
],
"(w)",
),
(
&[
"Ẋ", // U+1E8A: LATIN CAPITAL LETTER X WITH DOT ABOVE
"Ẍ", // U+1E8C: LATIN CAPITAL LETTER X WITH DIAERESIS
"Ⓧ", // U+24CD: CIRCLED LATIN CAPITAL LETTER X
"X", // U+FF38: FULLWIDTH LATIN CAPITAL LETTER X
],
"X",
),
(
&[
"ᶍ", // U+1D8D: LATIN SMALL LETTER X WITH PALATAL HOOK
"ẋ", // U+1E8B: LATIN SMALL LETTER X WITH DOT ABOVE
"ẍ", // U+1E8D: LATIN SMALL LETTER X WITH DIAERESIS
"ₓ", // U+2093: LATIN SUBSCRIPT SMALL LETTER X
"ⓧ", // U+24E7: CIRCLED LATIN SMALL LETTER X
"x", // U+FF58: FULLWIDTH LATIN SMALL LETTER X
],
"x",
),
(
&[
"⒳", // U+24B3: PARENTHESIZED LATIN SMALL LETTER X
],
"(x)",
),
(
&[
"Ý", // U+00DD: LATIN CAPITAL LETTER Y WITH ACUTE
"Ŷ", // U+0176: LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
"Ÿ", // U+0178: LATIN CAPITAL LETTER Y WITH DIAERESIS
"Ƴ", // U+01B3: LATIN CAPITAL LETTER Y WITH HOOK
"Ȳ", // U+0232: LATIN CAPITAL LETTER Y WITH MACRON
"Ɏ", // U+024E: LATIN CAPITAL LETTER Y WITH STROKE
"ʏ", // U+028F: LATIN LETTER SMALL CAPITAL Y
"Ẏ", // U+1E8E: LATIN CAPITAL LETTER Y WITH DOT ABOVE
"Ỳ", // U+1EF2: LATIN CAPITAL LETTER Y WITH GRAVE
"Ỵ", // U+1EF4: LATIN CAPITAL LETTER Y WITH DOT BELOW
"Ỷ", // U+1EF6: LATIN CAPITAL LETTER Y WITH HOOK ABOVE
"Ỹ", // U+1EF8: LATIN CAPITAL LETTER Y WITH TILDE
"Ỿ", // U+1EFE: LATIN CAPITAL LETTER Y WITH LOOP
"Ⓨ", // U+24CE: CIRCLED LATIN CAPITAL LETTER Y
"Y", // U+FF39: FULLWIDTH LATIN CAPITAL LETTER Y
],
"Y",
),
(
&[
"ý", // U+00FD: LATIN SMALL LETTER Y WITH ACUTE
"ÿ", // U+00FF: LATIN SMALL LETTER Y WITH DIAERESIS
"ŷ", // U+0177: LATIN SMALL LETTER Y WITH CIRCUMFLEX
"ƴ", // U+01B4: LATIN SMALL LETTER Y WITH HOOK
"ȳ", // U+0233: LATIN SMALL LETTER Y WITH MACRON
"ɏ", // U+024F: LATIN SMALL LETTER Y WITH STROKE
"ʎ", // U+028E: LATIN SMALL LETTER TURNED Y
"ẏ", // U+1E8F: LATIN SMALL LETTER Y WITH DOT ABOVE
"ẙ", // U+1E99: LATIN SMALL LETTER Y WITH RING ABOVE
"ỳ", // U+1EF3: LATIN SMALL LETTER Y WITH GRAVE
"ỵ", // U+1EF5: LATIN SMALL LETTER Y WITH DOT BELOW
"ỷ", // U+1EF7: LATIN SMALL LETTER Y WITH HOOK ABOVE
"ỹ", // U+1EF9: LATIN SMALL LETTER Y WITH TILDE
"ỿ", // U+1EFF: LATIN SMALL LETTER Y WITH LOOP
"ⓨ", // U+24E8: CIRCLED LATIN SMALL LETTER Y
"y", // U+FF59: FULLWIDTH LATIN SMALL LETTER Y
],
"y",
),
(
&[
"⒴", // U+24B4: PARENTHESIZED LATIN SMALL LETTER Y
],
"(y)",
),
(
&[
"Ź", // U+0179: LATIN CAPITAL LETTER Z WITH ACUTE
"Ż", // U+017B: LATIN CAPITAL LETTER Z WITH DOT ABOVE
"Ž", // U+017D: LATIN CAPITAL LETTER Z WITH CARON
"Ƶ", // U+01B5: LATIN CAPITAL LETTER Z WITH STROKE
"Ȝ", // U+021C: LATIN CAPITAL LETTER YOGH
"Ȥ", // U+0224: LATIN CAPITAL LETTER Z WITH HOOK
"ᴢ", // U+1D22: LATIN LETTER SMALL CAPITAL Z
"Ẑ", // U+1E90: LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
"Ẓ", // U+1E92: LATIN CAPITAL LETTER Z WITH DOT BELOW
"Ẕ", // U+1E94: LATIN CAPITAL LETTER Z WITH LINE BELOW
"Ⓩ", // U+24CF: CIRCLED LATIN CAPITAL LETTER Z
"Ⱬ", // U+2C6B: LATIN CAPITAL LETTER Z WITH DESCENDER
"Ꝣ", // U+A762: LATIN CAPITAL LETTER VISIGOTHIC Z
"Z", // U+FF3A: FULLWIDTH LATIN CAPITAL LETTER Z
],
"Z",
),
(
&[
"ź", // U+017A: LATIN SMALL LETTER Z WITH ACUTE
"ż", // U+017C: LATIN SMALL LETTER Z WITH DOT ABOVE
"ž", // U+017E: LATIN SMALL LETTER Z WITH CARON
"ƶ", // U+01B6: LATIN SMALL LETTER Z WITH STROKE
"ȝ", // U+021D: LATIN SMALL LETTER YOGH
"ȥ", // U+0225: LATIN SMALL LETTER Z WITH HOOK
"ɀ", // U+0240: LATIN SMALL LETTER Z WITH SWASH TAIL
"ʐ", // U+0290: LATIN SMALL LETTER Z WITH RETROFLEX HOOK
"ʑ", // U+0291: LATIN SMALL LETTER Z WITH CURL
"ᵶ", // U+1D76: LATIN SMALL LETTER Z WITH MIDDLE TILDE
"ᶎ", // U+1D8E: LATIN SMALL LETTER Z WITH PALATAL HOOK
"ẑ", // U+1E91: LATIN SMALL LETTER Z WITH CIRCUMFLEX
"ẓ", // U+1E93: LATIN SMALL LETTER Z WITH DOT BELOW
"ẕ", // U+1E95: LATIN SMALL LETTER Z WITH LINE BELOW
"ⓩ", // U+24E9: CIRCLED LATIN SMALL LETTER Z
"ⱬ", // U+2C6C: LATIN SMALL LETTER Z WITH DESCENDER
"ꝣ", // U+A763: LATIN SMALL LETTER VISIGOTHIC Z
"z", // U+FF5A: FULLWIDTH LATIN SMALL LETTER Z
],
"z",
),
(
&[
"⒵", // U+24B5: PARENTHESIZED LATIN SMALL LETTER Z
],
"(z)",
),
(
&[
"⁰", // U+2070: SUPERSCRIPT ZERO
"₀", // U+2080: SUBSCRIPT ZERO
"⓪", // U+24EA: CIRCLED DIGIT ZERO
"⓿", // U+24FF: NEGATIVE CIRCLED DIGIT ZERO
"0", // U+FF10: FULLWIDTH DIGIT ZERO
],
"0",
),
(
&[
"¹", // U+00B9: SUPERSCRIPT ONE
"₁", // U+2081: SUBSCRIPT ONE
"①", // U+2460: CIRCLED DIGIT ONE
"⓵", // U+24F5: DOUBLE CIRCLED DIGIT ONE
"❶", // U+2776: DINGBAT NEGATIVE CIRCLED DIGIT ONE
"➀", // U+2780: DINGBAT CIRCLED SANS-SERIF DIGIT ONE
"➊", // U+278A: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
"1", // U+FF11: FULLWIDTH DIGIT ONE
],
"1",
),
(
&[
"⒈", // U+2488: DIGIT ONE FULL STOP
],
"1.",
),
(
&[
"⑴", // U+2474: PARENTHESIZED DIGIT ONE
],
"(1)",
),
(
&[
"²", // U+00B2: SUPERSCRIPT TWO
"₂", // U+2082: SUBSCRIPT TWO
"②", // U+2461: CIRCLED DIGIT TWO
"⓶", // U+24F6: DOUBLE CIRCLED DIGIT TWO
"❷", // U+2777: DINGBAT NEGATIVE CIRCLED DIGIT TWO
"➁", // U+2781: DINGBAT CIRCLED SANS-SERIF DIGIT TWO
"➋", // U+278B: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO
"2", // U+FF12: FULLWIDTH DIGIT TWO
],
"2",
),
(
&[
"⒉", // U+2489: DIGIT TWO FULL STOP
],
"2.",
),
(
&[
"⑵", // U+2475: PARENTHESIZED DIGIT TWO
],
"(2)",
),
(
&[
"³", // U+00B3: SUPERSCRIPT THREE
"₃", // U+2083: SUBSCRIPT THREE
"③", // U+2462: CIRCLED DIGIT THREE
"⓷", // U+24F7: DOUBLE CIRCLED DIGIT THREE
"❸", // U+2778: DINGBAT NEGATIVE CIRCLED DIGIT THREE
"➂", // U+2782: DINGBAT CIRCLED SANS-SERIF DIGIT THREE
"➌", // U+278C: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE
"3", // U+FF13: FULLWIDTH DIGIT THREE
],
"3",
),
(
&[
"⒊", // U+248A: DIGIT THREE FULL STOP
],
"3.",
),
(
&[
"⑶", // U+2476: PARENTHESIZED DIGIT THREE
],
"(3)",
),
(
&[
"⁴", // U+2074: SUPERSCRIPT FOUR
"₄", // U+2084: SUBSCRIPT FOUR
"④", // U+2463: CIRCLED DIGIT FOUR
"⓸", // U+24F8: DOUBLE CIRCLED DIGIT FOUR
"❹", // U+2779: DINGBAT NEGATIVE CIRCLED DIGIT FOUR
"➃", // U+2783: DINGBAT CIRCLED SANS-SERIF DIGIT FOUR
"➍", // U+278D: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR
"4", // U+FF14: FULLWIDTH DIGIT FOUR
],
"4",
),
(
&[
"⒋", // U+248B: DIGIT FOUR FULL STOP
],
"4.",
),
(
&[
"⑷", // U+2477: PARENTHESIZED DIGIT FOUR
],
"(4)",
),
(
&[
"⁵", // U+2075: SUPERSCRIPT FIVE
"₅", // U+2085: SUBSCRIPT FIVE
"⑤", // U+2464: CIRCLED DIGIT FIVE
"⓹", // U+24F9: DOUBLE CIRCLED DIGIT FIVE
"❺", // U+277A: DINGBAT NEGATIVE CIRCLED DIGIT FIVE
"➄", // U+2784: DINGBAT CIRCLED SANS-SERIF DIGIT FIVE
"➎", // U+278E: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE
"5", // U+FF15: FULLWIDTH DIGIT FIVE
],
"5",
),
(
&[
"⒌", // U+248C: DIGIT FIVE FULL STOP
],
"5.",
),
(
&[
"⑸", // U+2478: PARENTHESIZED DIGIT FIVE
],
"(5)",
),
(
&[
"⁶", // U+2076: SUPERSCRIPT SIX
"₆", // U+2086: SUBSCRIPT SIX
"⑥", // U+2465: CIRCLED DIGIT SIX
"⓺", // U+24FA: DOUBLE CIRCLED DIGIT SIX
"❻", // U+277B: DINGBAT NEGATIVE CIRCLED DIGIT SIX
"➅", // U+2785: DINGBAT CIRCLED SANS-SERIF DIGIT SIX
"➏", // U+278F: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX
"6", // U+FF16: FULLWIDTH DIGIT SIX
],
"6",
),
(
&[
"⒍", // U+248D: DIGIT SIX FULL STOP
],
"6.",
),
(
&[
"⑹", // U+2479: PARENTHESIZED DIGIT SIX
],
"(6)",
),
(
&[
"⁷", // U+2077: SUPERSCRIPT SEVEN
"₇", // U+2087: SUBSCRIPT SEVEN
"⑦", // U+2466: CIRCLED DIGIT SEVEN
"⓻", // U+24FB: DOUBLE CIRCLED DIGIT SEVEN
"❼", // U+277C: DINGBAT NEGATIVE CIRCLED DIGIT SEVEN
"➆", // U+2786: DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN
"➐", // U+2790: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN
"7", // U+FF17: FULLWIDTH DIGIT SEVEN
],
"7",
),
(
&[
"⒎", // U+248E: DIGIT SEVEN FULL STOP
],
"7.",
),
(
&[
"⑺", // U+247A: PARENTHESIZED DIGIT SEVEN
],
"(7)",
),
(
&[
"⁸", // U+2078: SUPERSCRIPT EIGHT
"₈", // U+2088: SUBSCRIPT EIGHT
"⑧", // U+2467: CIRCLED DIGIT EIGHT
"⓼", // U+24FC: DOUBLE CIRCLED DIGIT EIGHT
"❽", // U+277D: DINGBAT NEGATIVE CIRCLED DIGIT EIGHT
"➇", // U+2787: DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT
"➑", // U+2791: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT
"8", // U+FF18: FULLWIDTH DIGIT EIGHT
],
"8",
),
(
&[
"⒏", // U+248F: DIGIT EIGHT FULL STOP
],
"8.",
),
(
&[
"⑻", // U+247B: PARENTHESIZED DIGIT EIGHT
],
"(8)",
),
(
&[
"⁹", // U+2079: SUPERSCRIPT NINE
"₉", // U+2089: SUBSCRIPT NINE
"⑨", // U+2468: CIRCLED DIGIT NINE
"⓽", // U+24FD: DOUBLE CIRCLED DIGIT NINE
"❾", // U+277E: DINGBAT NEGATIVE CIRCLED DIGIT NINE
"➈", // U+2788: DINGBAT CIRCLED SANS-SERIF DIGIT NINE
"➒", // U+2792: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
"9", // U+FF19: FULLWIDTH DIGIT NINE
],
"9",
),
(
&[
"⒐", // U+2490: DIGIT NINE FULL STOP
],
"9.",
),
(
&[
"⑼", // U+247C: PARENTHESIZED DIGIT NINE
],
"(9)",
),
(
&[
"⑩", // U+2469: CIRCLED NUMBER TEN
"⓾", // U+24FE: DOUBLE CIRCLED NUMBER TEN
"❿", // U+277F: DINGBAT NEGATIVE CIRCLED NUMBER TEN
"➉", // U+2789: DINGBAT CIRCLED SANS-SERIF NUMBER TEN
"➓", // U+2793: DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
],
"10",
),
(
&[
"⒑", // U+2491: NUMBER TEN FULL STOP
],
"10.",
),
(
&[
"⑽", // U+247D: PARENTHESIZED NUMBER TEN
],
"(10)",
),
(
&[
"⑪", // U+246A: CIRCLED NUMBER ELEVEN
"⓫", // U+24EB: NEGATIVE CIRCLED NUMBER ELEVEN
],
"11",
),
(
&[
"⒒", // U+2492: NUMBER ELEVEN FULL STOP
],
"11.",
),
(
&[
"⑾", // U+247E: PARENTHESIZED NUMBER ELEVEN
],
"(11)",
),
(
&[
"⑫", // U+246B: CIRCLED NUMBER TWELVE
"⓬", // U+24EC: NEGATIVE CIRCLED NUMBER TWELVE
],
"12",
),
(
&[
"⒓", // U+2493: NUMBER TWELVE FULL STOP
],
"12.",
),
(
&[
"⑿", // U+247F: PARENTHESIZED NUMBER TWELVE
],
"(12)",
),
(
&[
"⑬", // U+246C: CIRCLED NUMBER THIRTEEN
"⓭", // U+24ED: NEGATIVE CIRCLED NUMBER THIRTEEN
],
"13",
),
(
&[
"⒔", // U+2494: NUMBER THIRTEEN FULL STOP
],
"13.",
),
(
&[
"⒀", // U+2480: PARENTHESIZED NUMBER THIRTEEN
],
"(13)",
),
(
&[
"⑭", // U+246D: CIRCLED NUMBER FOURTEEN
"⓮", // U+24EE: NEGATIVE CIRCLED NUMBER FOURTEEN
],
"14",
),
(
&[
"⒕", // U+2495: NUMBER FOURTEEN FULL STOP
],
"14.",
),
(
&[
"⒁", // U+2481: PARENTHESIZED NUMBER FOURTEEN
],
"(14)",
),
(
&[
"⑮", // U+246E: CIRCLED NUMBER FIFTEEN
"⓯", // U+24EF: NEGATIVE CIRCLED NUMBER FIFTEEN
],
"15",
),
(
&[
"⒖", // U+2496: NUMBER FIFTEEN FULL STOP
],
"15.",
),
(
&[
"⒂", // U+2482: PARENTHESIZED NUMBER FIFTEEN
],
"(15)",
),
(
&[
"⑯", // U+246F: CIRCLED NUMBER SIXTEEN
"⓰", // U+24F0: NEGATIVE CIRCLED NUMBER SIXTEEN
],
"16",
),
(
&[
"⒗", // U+2497: NUMBER SIXTEEN FULL STOP
],
"16.",
),
(
&[
"⒃", // U+2483: PARENTHESIZED NUMBER SIXTEEN
],
"(16)",
),
(
&[
"⑰", // U+2470: CIRCLED NUMBER SEVENTEEN
"⓱", // U+24F1: NEGATIVE CIRCLED NUMBER SEVENTEEN
],
"17",
),
(
&[
"⒘", // U+2498: NUMBER SEVENTEEN FULL STOP
],
"17.",
),
(
&[
"⒄", // U+2484: PARENTHESIZED NUMBER SEVENTEEN
],
"(17)",
),
(
&[
"⑱", // U+2471: CIRCLED NUMBER EIGHTEEN
"⓲", // U+24F2: NEGATIVE CIRCLED NUMBER EIGHTEEN
],
"18",
),
(
&[
"⒙", // U+2499: NUMBER EIGHTEEN FULL STOP
],
"18.",
),
(
&[
"⒅", // U+2485: PARENTHESIZED NUMBER EIGHTEEN
],
"(18)",
),
(
&[
"⑲", // U+2472: CIRCLED NUMBER NINETEEN
"⓳", // U+24F3: NEGATIVE CIRCLED NUMBER NINETEEN
],
"19",
),
(
&[
"⒚", // U+249A: NUMBER NINETEEN FULL STOP
],
"19.",
),
(
&[
"⒆", // U+2486: PARENTHESIZED NUMBER NINETEEN
],
"(19)",
),
(
&[
"⑳", // U+2473: CIRCLED NUMBER TWENTY
"⓴", // U+24F4: NEGATIVE CIRCLED NUMBER TWENTY
],
"20",
),
(
&[
"⒛", // U+249B: NUMBER TWENTY FULL STOP
],
"20.",
),
(
&[
"⒇", // U+2487: PARENTHESIZED NUMBER TWENTY
],
"(20)",
),
(
&[
"«", // U+00AB: LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
"»", // U+00BB: RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
"“", // U+201C: LEFT DOUBLE QUOTATION MARK
"”", // U+201D: RIGHT DOUBLE QUOTATION MARK
"„", // U+201E: DOUBLE LOW-9 QUOTATION MARK
"″", // U+2033: DOUBLE PRIME
"‶", // U+2036: REVERSED DOUBLE PRIME
"❝", // U+275D: HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT
"❞", // U+275E: HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
"❮", // U+276E: HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
"❯", // U+276F: HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
""", // U+FF02: FULLWIDTH QUOTATION MARK
],
"\"",
),
(
&[
"‘", // U+2018: LEFT SINGLE QUOTATION MARK
"’", // U+2019: RIGHT SINGLE QUOTATION MARK
"‚", // U+201A: SINGLE LOW-9 QUOTATION MARK
"‛", // U+201B: SINGLE HIGH-REVERSED-9 QUOTATION MARK
"′", // U+2032: PRIME
"‵", // U+2035: REVERSED PRIME
"‹", // U+2039: SINGLE LEFT-POINTING ANGLE QUOTATION MARK
"›", // U+203A: SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
"❛", // U+275B: HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT
"❜", // U+275C: HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT
"'", // U+FF07: FULLWIDTH APOSTROPHE
],
"'",
),
(
&[
"‐", // U+2010: HYPHEN
"‑", // U+2011: NON-BREAKING HYPHEN
"‒", // U+2012: FIGURE DASH
"–", // U+2013: EN DASH
"—", // U+2014: EM DASH
"⁻", // U+207B: SUPERSCRIPT MINUS
"₋", // U+208B: SUBSCRIPT MINUS
"-", // U+FF0D: FULLWIDTH HYPHEN-MINUS
],
"-",
),
(
&[
"⁅", // U+2045: LEFT SQUARE BRACKET WITH QUILL
"❲", // U+2772: LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
"[", // U+FF3B: FULLWIDTH LEFT SQUARE BRACKET
],
"[",
),
(
&[
"⁆", // U+2046: RIGHT SQUARE BRACKET WITH QUILL
"❳", // U+2773: LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
"]", // U+FF3D: FULLWIDTH RIGHT SQUARE BRACKET
],
"]",
),
(
&[
"⁽", // U+207D: SUPERSCRIPT LEFT PARENTHESIS
"₍", // U+208D: SUBSCRIPT LEFT PARENTHESIS
"❨", // U+2768: MEDIUM LEFT PARENTHESIS ORNAMENT
"❪", // U+276A: MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
"(", // U+FF08: FULLWIDTH LEFT PARENTHESIS
],
"(",
),
(
&[
"⸨", // U+2E28: LEFT DOUBLE PARENTHESIS
],
"((",
),
(
&[
"⁾", // U+207E: SUPERSCRIPT RIGHT PARENTHESIS
"₎", // U+208E: SUBSCRIPT RIGHT PARENTHESIS
"❩", // U+2769: MEDIUM RIGHT PARENTHESIS ORNAMENT
"❫", // U+276B: MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
")", // U+FF09: FULLWIDTH RIGHT PARENTHESIS
],
")",
),
(
&[
"⸩", // U+2E29: RIGHT DOUBLE PARENTHESIS
],
"))",
),
(
&[
"❬", // U+276C: MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
"❰", // U+2770: HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
"<", // U+FF1C: FULLWIDTH LESS-THAN SIGN
],
"<",
),
(
&[
"❭", // U+276D: MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
"❱", // U+2771: HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
">", // U+FF1E: FULLWIDTH GREATER-THAN SIGN
],
">",
),
(
&[
"❴", // U+2774: MEDIUM LEFT CURLY BRACKET ORNAMENT
"{", // U+FF5B: FULLWIDTH LEFT CURLY BRACKET
],
"{",
),
(
&[
"❵", // U+2775: MEDIUM RIGHT CURLY BRACKET ORNAMENT
"}", // U+FF5D: FULLWIDTH RIGHT CURLY BRACKET
],
"}",
),
(
&[
"⁺", // U+207A: SUPERSCRIPT PLUS SIGN
"₊", // U+208A: SUBSCRIPT PLUS SIGN
"+", // U+FF0B: FULLWIDTH PLUS SIGN
],
"+",
),
(
&[
"⁼", // U+207C: SUPERSCRIPT EQUALS SIGN
"₌", // U+208C: SUBSCRIPT EQUALS SIGN
"=", // U+FF1D: FULLWIDTH EQUALS SIGN
],
"=",
),
(
&[
"!", // U+FF01: FULLWIDTH EXCLAMATION MARK
],
"!",
),
(
&[
"‼", // U+203C: DOUBLE EXCLAMATION MARK
],
"!!",
),
(
&[
"⁉", // U+2049: EXCLAMATION QUESTION MARK
],
"!?",
),
(
&[
"#", // U+FF03: FULLWIDTH NUMBER SIGN
],
"#",
),
(
&[
"$", // U+FF04: FULLWIDTH DOLLAR SIGN
],
"$",
),
(
&[
"⁒", // U+2052: COMMERCIAL MINUS SIGN
"%", // U+FF05: FULLWIDTH PERCENT SIGN
],
"%",
),
(
&[
"&", // U+FF06: FULLWIDTH AMPERSAND
],
"&",
),
(
&[
"⁎", // U+204E: LOW ASTERISK
"*", // U+FF0A: FULLWIDTH ASTERISK
],
"*",
),
(
&[
",", // U+FF0C: FULLWIDTH COMMA
],
",",
),
(
&[
".", // U+FF0E: FULLWIDTH FULL STOP
],
".",
),
(
&[
"⁄", // U+2044: FRACTION SLASH
"/", // U+FF0F: FULLWIDTH SOLIDUS
],
"/",
),
(
&[
":", // U+FF1A: FULLWIDTH COLON
],
":",
),
(
&[
"⁏", // U+204F: REVERSED SEMICOLON
";", // U+FF1B: FULLWIDTH SEMICOLON
],
";",
),
(
&[
"?", // U+FF1F: FULLWIDTH QUESTION MARK
],
"?",
),
(
&[
"⁇", // U+2047: DOUBLE QUESTION MARK
],
"??",
),
(
&[
"⁈", // U+2048: QUESTION EXCLAMATION MARK
],
"?!",
),
(
&[
"@", // U+FF20: FULLWIDTH COMMERCIAL AT
],
"@",
),
(
&[
"\", // U+FF3C: FULLWIDTH REVERSE SOLIDUS
],
"\\",
),
(
&[
"‸", // U+2038: CARET
"^", // U+FF3E: FULLWIDTH CIRCUMFLEX ACCENT
],
"^",
),
(
&[
"_", // U+FF3F: FULLWIDTH LOW LINE
],
"_",
),
(
&[
"⁓", // U+2053: SWUNG DASH
"~", // U+FF5E: FULLWIDTH TILDE
],
"~",
),
];
for (characters, folded) in foldings {
for &c in characters {
assert_eq!(
folding_using_raw_tokenizer_helper(c),
folded,
"testing that character \"{}\" becomes \"{}\"",
c,
folded
);
}
}
}