in Sources/Tokenizers/BertTokenizer.swift [122:140]
func convertWordpieceToBasicTokenList(_ wordpieceTokenList: [String]) -> String {
var tokenList: [String] = []
var individualToken = ""
for token in wordpieceTokenList {
if token.starts(with: "##") {
individualToken += String(token.suffix(token.count - 2))
} else {
if individualToken.count > 0 {
tokenList.append(individualToken)
}
individualToken = token
}
}
tokenList.append(individualToken)
return tokenList.joined(separator: " ")
}