in src/Editor/Text/Def/Internal/TextData/UnicodeWordExtent.cs [299:358]
public static bool IsPropBreak(char cL, char cR, bool fHanWordBreak)
{
UnicodeCategory R = Char.GetUnicodeCategory(cR);
if (IsPropCombining(R))
return false;
// Rule (1 usually does not occur (no line/para bounds in the text we're searching)
// or will be caught by (2 and (3.
// Rule (1 Para //
// Rule (2 !Let // Let
// Rule (3 Let // !Let
bool fL = IsWordChar(cL);
bool fR = IsWordChar(cR);
if (fL != fR)
return true;
// break between any two non-word-chars
if (!fL && !fR)
return true;
// Break between characters of different scripts (e.g. Arabic/English)
// unless one is a letter & the other is a digit
UnicodeScript LScript = UScript(cL);
UnicodeScript RScript = UScript(cR);
if (LScript != RScript)
{
UnicodeCategory L = Char.GetUnicodeCategory(cL);
if ((IsPropAlpha(L) && IsPropAlpha(R)) ||
(IsPropDigit(L) && IsPropDigit(R)))
return true;
}
if ((LScript == UnicodeScript.CJK) || (RScript == UnicodeScript.CJK))
{
bool fLHan = IsIdeograph(cL);
bool fRHan = IsIdeograph(cR);
if (fHanWordBreak && (fLHan || fRHan))
return true;
// Rule (4 !(Hira|Kata|Han) // Hira|Kata|Han
if ((LScript != UnicodeScript.CJK) && (RScript == UnicodeScript.CJK))
return true;
// Rule (5 Hira // !Hira
bool fRHiragana = IsHiragana(cR);
if (IsHiragana(cL) && !fRHiragana)
return true;
// Rule (6 Kata // !(Hira|Kata)
if (IsKatakana(cL) && !(fRHiragana || IsKatakana(cR)))
return true;
// Rule (7 Han // !(Hira|Han)
if (fLHan && !(fRHan || fRHiragana))
return true;
}
return false;
}