func split()

in Sources/Tokenizers/PreTokenizer.swift [302:338]


    func split(by captureRegex: NSRegularExpression) -> [String] {
        // Find the matching capture groups
        let selfRange = NSRange(startIndex..<endIndex, in: self)
        let matches = captureRegex.matches(in: self, options: [], range: selfRange)

        if matches.isEmpty { return [self] }

        var result: [String] = []
        var start = startIndex
        for match in matches {
            // Safely move the prefix end to the start of the current match
            let safePrefixEnd = index(startIndex, offsetBy: match.range.lowerBound, limitedBy: endIndex) ?? endIndex
            if start < safePrefixEnd {
                result.append(String(self[start..<safePrefixEnd]))
            }

            // Safely move the start index to the end of the current match
            let matchEndIndex = index(startIndex, offsetBy: match.range.upperBound, limitedBy: endIndex) ?? endIndex
            start = matchEndIndex

            // Append separator, supporting capture groups
            for r in (0..<match.numberOfRanges).reversed() {
                let matchRange = match.range(at: r)
                if let sepRange = Range(matchRange, in: self) {
                    result.append(String(self[sepRange]))
                    break
                }
            }
        }

        // Append remaining suffix
        if start < endIndex {
            result.append(String(self[start...]))
        }

        return result
    }