in Runtime/Tokenizers/Normalizers/Normalizers.cs [96:120]
public string TokenizeChineseChars(string text)
{
// Adds whitespace around any CJK character.
StringBuilder output = new StringBuilder();
for (int i = 0; i < text.Length; i++)
{
char character = text[i];
// Used to convert the character to its Unicode code point.
int unicodeCodePoint = Char.ConvertToUtf32(character.ToString(), 0);
if (IsChineseChar(unicodeCodePoint))
{
output.Append(" ");
output.Append(character);
output.Append(" ");
}
else
{
output.Append(character);
}
}
return output.ToString();
}