CharacterType InitialsGenerator::GetCharacterType()

in dev/PersonPicture/InitialsGenerator.cpp [292:584]


CharacterType InitialsGenerator::GetCharacterType(wchar_t character)
{
    // To ensure predictable behavior, we're currently operating on an allowed list of character sets.
    //
    // Each block below is a HEX range in the official Unicode spec, which defines a set
    // of Unicode characters. Changes to the character sets would only be made by Unicode, and
    // are highly unlikely (as it would break virtually every modern text parser).
    // Definitions available here: http://www.unicode.org/charts/
    //
    // GLYPH
    //
    // IPA Extensions
    if ((character >= 0x0250) && (character <= 0x02AF))
    {
        return CharacterType::Glyph;
    }

    // Arabic
    if ((character >= 0x0600) && (character <= 0x06FF))
    {
        return CharacterType::Glyph;
    }

    // Arabic Supplement
    if ((character >= 0x0750) && (character <= 0x077F))
    {
        return CharacterType::Glyph;
    }

    // Arabic Extended-A
    if ((character >= 0x08A0) && (character <= 0x08FF))
    {
        return CharacterType::Glyph;
    }

    // Arabic Presentation Forms-A
    if ((character >= 0xFB50) && (character <= 0xFDFF))
    {
        return CharacterType::Glyph;
    }

    // Arabic Presentation Forms-B
    if ((character >= 0xFE70) && (character <= 0xFEFF))
    {
        return CharacterType::Glyph;
    }

    // Devanagari
    if ((character >= 0x0900) && (character <= 0x097F))
    {
        return CharacterType::Glyph;
    }

    // Devanagari Extended
    if ((character >= 0xA8E0) && (character <= 0xA8FF))
    {
        return CharacterType::Glyph;
    }

    // Bengali
    if ((character >= 0x0980) && (character <= 0x09FF))
    {
        return CharacterType::Glyph;
    }

    // Gurmukhi
    if ((character >= 0x0A00) && (character <= 0x0A7F))
    {
        return CharacterType::Glyph;
    }

    // Gujarati
    if ((character >= 0x0A80) && (character <= 0x0AFF))
    {
        return CharacterType::Glyph;
    }

    // Oriya
    if ((character >= 0x0B00) && (character <= 0x0B7F))
    {
        return CharacterType::Glyph;
    }

    // Tamil
    if ((character >= 0x0B80) && (character <= 0x0BFF))
    {
        return CharacterType::Glyph;
    }

    // Telugu
    if ((character >= 0x0C00) && (character <= 0x0C7F))
    {
        return CharacterType::Glyph;
    }

    // Kannada
    if ((character >= 0x0C80) && (character <= 0x0CFF))
    {
        return CharacterType::Glyph;
    }

    // Malayalam
    if ((character >= 0x0D00) && (character <= 0x0D7F))
    {
        return CharacterType::Glyph;
    }

    // Sinhala
    if ((character >= 0x0D80) && (character <= 0x0DFF))
    {
        return CharacterType::Glyph;
    }

    // Thai
    if ((character >= 0x0E00) && (character <= 0x0E7F))
    {
        return CharacterType::Glyph;
    }

    // Lao
    if ((character >= 0x0E80) && (character <= 0x0EFF))
    {
        return CharacterType::Glyph;
    }

    // SYMBOLIC
    //
    // CJK Unified Ideographs
    if ((character >= 0x4E00) && (character <= 0x9FFF))
    {
        return CharacterType::Symbolic;
    }

    // CJK Unified Ideographs Extension 
    if ((character >= 0x3400) && (character <= 0x4DBF))
    {
        return CharacterType::Symbolic;
    }

    // CJK Unified Ideographs Extension B
    if ((character >= 0x20000) && (character <= 0x2A6DF))
    {
        return CharacterType::Symbolic;
    }

    // CJK Unified Ideographs Extension C
    if ((character >= 0x2A700) && (character <= 0x2B73F))
    {
        return CharacterType::Symbolic;
    }

    // CJK Unified Ideographs Extension D
    if ((character >= 0x2B740) && (character <= 0x2B81F))
    {
        return CharacterType::Symbolic;
    }

    // CJK Radicals Supplement
    if ((character >= 0x2E80) && (character <= 0x2EFF))
    {
        return CharacterType::Symbolic;
    }

    // CJK Symbols and Punctuation
    if ((character >= 0x3000) && (character <= 0x303F))
    {
        return CharacterType::Symbolic;
    }

    // CJK Strokes
    if ((character >= 0x31C0) && (character <= 0x31EF))
    {
        return CharacterType::Symbolic;
    }

    // Enclosed CJK Letters and Months
    if ((character >= 0x3200) && (character <= 0x32FF))
    {
        return CharacterType::Symbolic;
    }

    // CJK Compatibility
    if ((character >= 0x3300) && (character <= 0x33FF))
    {
        return CharacterType::Symbolic;
    }

    // CJK Compatibility Ideographs
    if ((character >= 0xF900) && (character <= 0xFAFF))
    {
        return CharacterType::Symbolic;
    }

    // CJK Compatibility Forms
    if ((character >= 0xFE30) && (character <= 0xFE4F))
    {
        return CharacterType::Symbolic;
    }

    // CJK Compatibility Ideographs Supplement
    if ((character >= 0x2F800) && (character <= 0x2FA1F))
    {
        return CharacterType::Symbolic;
    }

    // Greek and Coptic
    if ((character >= 0x0370) && (character <= 0x03FF))
    {
        return CharacterType::Symbolic;
    }

    // Hebrew
    if ((character >= 0x0590) && (character <= 0x05FF))
    {
        return CharacterType::Symbolic;
    }

    // Armenian
    if ((character >= 0x0530) && (character <= 0x058F))
    {
        return CharacterType::Symbolic;
    }

    // LATIN
    //
    // Basic Latin
    if ((character > 0x0000) && (character <= 0x007F))
    {
        return CharacterType::Standard;
    }

    // Latin-1 Supplement
    if ((character >= 0x0080) && (character <= 0x00FF))
    {
        return CharacterType::Standard;
    }

    // Latin Extended-A
    if ((character >= 0x0100) && (character <= 0x017F))
    {
        return CharacterType::Standard;
    }

    // Latin Extended-B
    if ((character >= 0x0180) && (character <= 0x024F))
    {
        return CharacterType::Standard;
    }

    // Latin Extended-C
    if ((character >= 0x2C60) && (character <= 0x2C7F))
    {
        return CharacterType::Standard;
    }

    // Latin Extended-D
    if ((character >= 0xA720) && (character <= 0xA7FF))
    {
        return CharacterType::Standard;
    }

    // Latin Extended-E
    if ((character >= 0xAB30) && (character <= 0xAB6F))
    {
        return CharacterType::Standard;
    }

    // Latin Extended Additional
    if ((character >= 0x1E00) && (character <= 0x1EFF))
    {
        return CharacterType::Standard;
    }

    // Cyrillic
    if ((character >= 0x0400) && (character <= 0x04FF))
    {
        return CharacterType::Standard;
    }

    // Cyrillic Supplement
    if ((character >= 0x0500) && (character <= 0x052F))
    {
        return CharacterType::Standard;
    }

    // Combining Diacritical Marks
    if ((character >= 0x0300) && (character <= 0x036F))
    {
        return CharacterType::Standard;
    }

    return CharacterType::Other;
}