in src/Editor/Text/Def/Internal/TextData/UnicodeWordExtent.cs [467:592]
public static UnicodeScript UScript(char ch)
{
if (ch <= 0x024F) return UnicodeScript.LATIN; // 0x0000, 0x007F, Basic Latin
// 0x0080, 0x00FF, Latin-1 Supplement
// 0x0100, 0x017F, Latin Extended-A
// 0x0180, 0x024F, Latin Extended-B
if (ch < 0x2000)
{
if (ch < 0x1000)
{
if (ch < 0x0370) return UnicodeScript.NONE; // 0x0250, 0x02AF, IPA Extensions
// 0x02B0, 0x02FF, Spacing Modifier Letters
// 0x0300, 0x036F, Combining Diacritical Marks
if (ch < 0x0400) return UnicodeScript.GREEK; // 0x0370, 0x03FF, Greek
if (ch <= 0x04FF) return UnicodeScript.CYRILLIC; // 0x0400, 0x04FF, Cyrillic
if (ch < 0x0530) return UnicodeScript.NONE; // 0x0500, 0x052F, NONE
if (ch < 0x0590) return UnicodeScript.ARMENIAN; // 0x0530, 0x058F, Armenian
if (ch < 0x0600) return UnicodeScript.HEBREW; // 0x0590, 0x05FF, Hebrew
if (ch < 0x0700) return UnicodeScript.ARABIC; // 0x0600, 0x06FF, ARABIC
if (ch <= 0x074F) return UnicodeScript.SYRIAC; // 0x0700, 0x074F, SYRIAC
if (ch < 0x0780) return UnicodeScript.NONE; // 0x0750, 0x077F, NONE
if (ch <= 0x07BF) return UnicodeScript.THAANA; // 0x0780, 0x07BF, THAANA
if (ch < 0x0900) return UnicodeScript.NONE; // 0x07C0, 0x08FF, NONE
if (ch < 0x0980) return UnicodeScript.DEVANAGARI; // 0x0900, 0x097F, DEVANAGARI
if (ch < 0x0A00) return UnicodeScript.BANGLA; // 0x0980, 0x09FF, BANGLA
if (ch < 0x0A80) return UnicodeScript.GURMUKHI; // 0x0A00, 0x0A7F, GURMUKHI
if (ch < 0x0B00) return UnicodeScript.GUJARATI; // 0x0A80, 0x0AFF, GUJARATI
if (ch < 0x0B80) return UnicodeScript.ODIA; // 0x0B00, 0x0B7F, ODIA
if (ch < 0x0C00) return UnicodeScript.TAMIL; // 0x0B80, 0x0BFF, TAMIL
if (ch < 0x0C80) return UnicodeScript.TELUGU; // 0x0C00, 0x0C7F, TELUGU
if (ch < 0x0D00) return UnicodeScript.KANNADA; // 0x0C80, 0x0CFF, KANNADA
if (ch < 0x0D80) return UnicodeScript.MALAYALAM; // 0x0D00, 0x0D7F, MALAYALAM
if (ch < 0x0E00) return UnicodeScript.SINHALA; // 0x0D80, 0x0DFF, SINHALA
if (ch < 0x0E80) return UnicodeScript.THAI; // 0x0E00, 0x0E7F, THAI
if (ch < 0x0F00) return UnicodeScript.LAO; // 0x0E80, 0x0EFF, LAO
return UnicodeScript.TIBETAN; // 0x0F00, 0x0FFF, TIBETAN
}
else
{
if (ch < 0x10A0) return UnicodeScript.MYANMAR; // 0x1000, 0x109F, Myanmar
if (ch < 0x1100) return UnicodeScript.GEORGIAN; // 0x10A0, 0x10FF, Georgian
if (ch < 0x1200) return UnicodeScript.CJK; // 0x1100, 0x11FF, Hangul Jamo
if (ch < 0x13A0) return UnicodeScript.ETHIOPIC; // 0x1200, 0x139F, Ethiopic
if (ch < 0x1400) return UnicodeScript.CHEROKEE; // 0x13A0, 0x13FF, Cherokee
if (ch < 0x1680) return UnicodeScript.CANADIAN_ABORIGINAL; // 0x1400, 0x167F, Unified Canadian Aboriginal Syllabics
if (ch < 0x16A0) return UnicodeScript.OGHAM; // 0x1680, 0x169F, Ogham
if (ch < 0x1780) return UnicodeScript.RUNIC; // 0x16A0, 0x177F, Runic
if (ch < 0x1800) return UnicodeScript.KHMER; // 0x1780, 0x17FF, Khmer
if (ch <= 0x18AF) return UnicodeScript.MONGOLIAN; // 0x1800, 0x18AF, Mongolian
if (ch < 0x1E00) return UnicodeScript.NONE; // 0x18B0, 0x1DFF, NONE
if (ch < 0x1F00) return UnicodeScript.LATIN; // 0x1E00, 0x1EFF, Latin Extended Additional
return UnicodeScript.GREEK; // 0x1F00, 0x1FFF, Greek Extended
}
}
if (ch < 0xD800)
{
if (ch <= 0x27FF) return UnicodeScript.NONE; // 0x2000, 0x206F, General Punctuation
// 0x2070, 0x209F, Superscripts and Subscripts
// 0x20A0, 0x20CF, Currency Symbols
// 0x20D0, 0x20FF, Combining Marks for Symbols
// 0x2100, 0x214F, Letterlike Symbols
// 0x2150, 0x218F, Number Forms
// 0x2190, 0x21FF, Arrows
// 0x2200, 0x22FF, Mathematical Operators
// 0x2300, 0x23FF, Miscellaneous Technical
// 0x2400, 0x243F, Control Pictures
// 0x2440, 0x245F, Optical Character Recognition
// 0x2460, 0x24FF, Enclosed Alphanumerics
// 0x2500, 0x257F, Box Drawing
// 0x2580, 0x259F, Block Elements
// 0x25A0, 0x25FF, Geometric Shapes
// 0x2600, 0x26FF, Miscellaneous Symbols
// 0x2700, 0x27BF, Dingbats
if (ch <= 0x28FF) return UnicodeScript.BRAILLE; // 0x2800, 0x28FF, Braille Patterns
if (ch < 0x2E80) return UnicodeScript.NONE; // 0x2900, 0x2E7F, NONE
if (ch <= 0x31BF) return UnicodeScript.CJK; // 0x2E80, 0x2EFF, CJK Radicals Supplement
// 0x2F00, 0x2FDF, Kangxi Radicals
// 0x2FF0, 0x2FFF, Ideographic Description Characters
// 0x3000, 0x303F, CJK Symbols and Punctuation
// 0x3040, 0x309F, Hiragana
// 0x30A0, 0x30FF, Katakana
// 0x3100, 0x312F, Bopomofo
// 0x3130, 0x318F, Hangul Compatibility Jamo
// 0x3190, 0x319F, Kanbun
// 0x31A0, 0x31BF, Bopomofo Extended
if (ch < 0x3200) return UnicodeScript.NONE; // 0x31C0, 0x31FF, NONE
if (ch <= 0x4DBf) return UnicodeScript.CJK; // 0x3200, 0x32FF, Enclosed CJK Letters and Months
// 0x3300, 0x33FF, CJK Compatibility
// 0x3400, 0x4DB5, CJK Unified Ideographs Extension A
if (ch < 0x4E00) return UnicodeScript.NONE; // 0x4DC0, 0x3DFF, NONE
if (ch <= 0x9FFF) return UnicodeScript.CJK; // 0x4E00, 0x9FFF, CJK Unified Ideographs
if (ch <= 0xA4CF) return UnicodeScript.YI; // 0xA000, 0xA48F, Yi Syllables
// 0xA490, 0xA4CF, Yi Radicals
if (ch < 0xAC00) return UnicodeScript.NONE; // 0xA4D0, 0xABFF, NONE
if (ch <= 0xD7A3) return UnicodeScript.CJK; // 0xAC00, 0xD7A3, Hangul Syllables
return UnicodeScript.NONE; // 0xD7A4, 0xD7FF, NONE
}
if (ch < 0xF900) return UnicodeScript.NONE; // 0xD800, 0xDB7F, High Surrogates
// 0xDB80, 0xDBFF, High Private Use Surrogates
// 0xDC00, 0xDFFF, Low Surrogates
// 0xE000, 0xF8FF, Private Use
if (ch < 0xFB00) return UnicodeScript.CJK; // 0xF900, 0xFAFF, CJK Compatibility Ideographs
if (ch < 0xFB4F) return UnicodeScript.LATIN; // 0xFB00, 0xFB4F, Alphabetic Presentation Forms
if (ch < 0xFE00) return UnicodeScript.ARABIC; // 0xFB50, 0xFDFF, Arabic Presentation Forms-A
if (ch < 0xFE30) return UnicodeScript.NONE; // 0xFE20, 0xFE2F, Combining Half Marks
if (ch < 0xFE50) return UnicodeScript.CJK; // 0xFE30, 0xFE4F, CJK Compatibility Forms
if (ch < 0xFE70) return UnicodeScript.NONE; // 0xFE50, 0xFE6F, Small Form Variants
if (ch < 0xFEFF) return UnicodeScript.ARABIC; // 0xFE70, 0xFEFE, Arabic Presentation Forms-B
if (ch < 0xFFF0)
{
if (ch == 0xFEFF) return UnicodeScript.NONE; // 0xFEFF, 0xFEFF, Specials
// 0xFF00, 0xFF00, Halfwidth and Fullwidth Forms (FALLTHROUGH)
if ((ch >= 0xFF01) && (ch <= 0xFF5E))
return UnicodeScript.LATIN; // 0xFF01, 0xFF5E, LATIN
return UnicodeScript.CJK; // 0xFF5E, 0xFFEF, Halfwidth and Fullwidth Forms
}
return UnicodeScript.NONE; // 0xFFF0, 0xFFFD, Specials
// 0xFFFeE 0xFFFF, NONE
}