in paq/rerankers/rerank.py [0:0]
def tokenize(tokenizer, batch_qas, cuda, top_k):
input_as, input_bs = [], []
for item in batch_qas:
question_a = item['input_qa']['question'] + '?'
question_bs = [q['question'] + '? ' + q['answer'][0] for q in item['retrieved_qas']]
question_bs = question_bs[:top_k]
input_as += [question_a for _ in range(len(question_bs))]
input_bs += question_bs
inputs = tokenizer.batch_encode_plus(
list(zip(input_as, input_bs)), return_tensors='pt', padding='longest', add_special_tokens=True
)
inputs = {k: v.reshape(len(batch_qas), v.shape[0]//len(batch_qas), -1) for k,v in inputs.items()}
return {k: v.cuda() for k, v in inputs.items()} if cuda else inputs