in Sources/Tokenizers/Decoder.swift [111:139]
func decode(tokens: [String]) -> [String] {
var subTexts: [String] = []
var currentSubText: [String] = []
func convertTokensToString(_ tokens: [String]) -> String {
let text = tokens.joined(separator: "")
let utfCodepoints = text.map { byteDecoder[String($0)]! }
return String(decoding: utfCodepoints, as: UTF8.self)
}
for token in tokens {
if addedTokens.contains(token) {
if !currentSubText.isEmpty {
subTexts.append(convertTokensToString(currentSubText))
currentSubText = []
}
subTexts.append(token)
} else {
currentSubText.append(token)
}
}
if !currentSubText.isEmpty {
subTexts.append(convertTokensToString(currentSubText))
}
return subTexts
}