in Sources/Tokenizers/PreTokenizer.swift [302:338]
func split(by captureRegex: NSRegularExpression) -> [String] {
// Find the matching capture groups
let selfRange = NSRange(startIndex..<endIndex, in: self)
let matches = captureRegex.matches(in: self, options: [], range: selfRange)
if matches.isEmpty { return [self] }
var result: [String] = []
var start = startIndex
for match in matches {
// Safely move the prefix end to the start of the current match
let safePrefixEnd = index(startIndex, offsetBy: match.range.lowerBound, limitedBy: endIndex) ?? endIndex
if start < safePrefixEnd {
result.append(String(self[start..<safePrefixEnd]))
}
// Safely move the start index to the end of the current match
let matchEndIndex = index(startIndex, offsetBy: match.range.upperBound, limitedBy: endIndex) ?? endIndex
start = matchEndIndex
// Append separator, supporting capture groups
for r in (0..<match.numberOfRanges).reversed() {
let matchRange = match.range(at: r)
if let sepRange = Range(matchRange, in: self) {
result.append(String(self[sepRange]))
break
}
}
}
// Append remaining suffix
if start < endIndex {
result.append(String(self[start...]))
}
return result
}