in src/cs/Microsoft.PhoneticMatching/Nlp/Tokenizer/SplittingTokenizer.cs [30:59]
public IList<Token> Tokenize(string query)
{
List<Token> result = new List<Token>();
var index = 0;
MatchCollection matches = this.pattern.Matches(query);
foreach (Match match in matches)
{
if (index < match.Index)
{
var interval = new Interval(index, match.Index);
var token = new Token(query.Substring(interval.First, interval.Length), interval);
result.Add(token);
index += interval.Length + match.Length;
}
else if (index == match.Index)
{
index += match.Length;
}
}
// Add the rest.
if (index < query.Length)
{
var interval = new Interval(index, query.Length);
var token = new Token(query.Substring(interval.First, interval.Length), interval);
result.Add(token);
}
return result;
}