func tokenizer()

in QuestionAnswering/QuestionAnswering/QuestionAnswering.swift [45:67]


    func tokenizer(question: String, text: String) throws -> [Int?] {
        let tokenIdsQuestion = wordPieceTokenizer(question)
        if tokenIdsQuestion.count >= MODEL_INPUT_LENGTH {
            throw TokenizationError.Question_Too_Long
        }
        
        let tokenIdsText = wordPieceTokenizer(text)
        let pad = token2id[PAD];
        var ids = Array(repeating: pad, count: MODEL_INPUT_LENGTH)
        ids[0] = token2id[CLS];
        for (i, tokenid) in tokenIdsQuestion.enumerated() {
            ids[i+1] = tokenid;
        }
        ids[tokenIdsQuestion.count + 1] = token2id[SEP]

        let maxTextLength = min(tokenIdsText.count, MODEL_INPUT_LENGTH - tokenIdsQuestion.count - EXTRA_ID_NUM)
        for i in 0..<maxTextLength {
            ids[tokenIdsQuestion.count + i + 2] = tokenIdsText[i]
        }
        ids[tokenIdsQuestion.count + maxTextLength + 2] = token2id[SEP];

        return ids
    }