def tokenize()

in paq/rerankers/rerank.py [0:0]


def tokenize(tokenizer, batch_qas, cuda, top_k):
    input_as, input_bs = [], []

    for item in batch_qas:
        question_a = item['input_qa']['question'] + '?'
        question_bs = [q['question'] + '? ' + q['answer'][0] for q in item['retrieved_qas']]
        question_bs = question_bs[:top_k]
        input_as += [question_a for _ in range(len(question_bs))]
        input_bs += question_bs

    inputs = tokenizer.batch_encode_plus(
        list(zip(input_as, input_bs)), return_tensors='pt', padding='longest', add_special_tokens=True
    )
    inputs = {k: v.reshape(len(batch_qas), v.shape[0]//len(batch_qas), -1) for k,v in inputs.items()}
    return {k: v.cuda() for k, v in inputs.items()} if cuda else inputs