in QuestionAnswering/QuestionAnswering/QuestionAnswering.swift [45:67]
func tokenizer(question: String, text: String) throws -> [Int?] {
let tokenIdsQuestion = wordPieceTokenizer(question)
if tokenIdsQuestion.count >= MODEL_INPUT_LENGTH {
throw TokenizationError.Question_Too_Long
}
let tokenIdsText = wordPieceTokenizer(text)
let pad = token2id[PAD];
var ids = Array(repeating: pad, count: MODEL_INPUT_LENGTH)
ids[0] = token2id[CLS];
for (i, tokenid) in tokenIdsQuestion.enumerated() {
ids[i+1] = tokenid;
}
ids[tokenIdsQuestion.count + 1] = token2id[SEP]
let maxTextLength = min(tokenIdsText.count, MODEL_INPUT_LENGTH - tokenIdsQuestion.count - EXTRA_ID_NUM)
for i in 0..<maxTextLength {
ids[tokenIdsQuestion.count + i + 2] = tokenIdsText[i]
}
ids[tokenIdsQuestion.count + maxTextLength + 2] = token2id[SEP];
return ids
}