in Sources/Tokenizers/Decoder.swift [158:185]
func decode(tokens: [String]) -> [String] {
var newTokens: [String] = []
var byteTokens: [Int] = []
func parseByte(_ token: String) -> Int? {
guard token.count == 6, token.hasPrefix("<0x"), token.hasSuffix(">") else {
return nil
}
let startIndex = token.index(token.startIndex, offsetBy: 3)
let endIndex = token.index(token.startIndex, offsetBy: 5)
return Int(token[startIndex..<endIndex], radix: 16)
}
for token in tokens {
if let byte = parseByte(token) {
byteTokens.append(byte)
} else {
if !byteTokens.isEmpty {
// decode as utf8 and append
let codeUnits = byteTokens.map { UTF8.CodeUnit($0) }
newTokens.append(String(decoding: codeUnits, as: UTF8.self))
byteTokens.removeAll()
}
newTokens.append(token)
}
}
return newTokens
}