in drqa/reader/predictor.py [0:0]
def predict_batch(self, batch, top_n=1):
"""Predict a batch of document - question pairs."""
documents, questions, candidates = [], [], []
for b in batch:
documents.append(b[0])
questions.append(b[1])
candidates.append(b[2] if len(b) == 3 else None)
candidates = candidates if any(candidates) else None
# Tokenize the inputs, perhaps multi-processed.
if self.workers:
q_tokens = self.workers.map_async(tokenize, questions)
d_tokens = self.workers.map_async(tokenize, documents)
q_tokens = list(q_tokens.get())
d_tokens = list(d_tokens.get())
else:
q_tokens = list(map(self.tokenizer.tokenize, questions))
d_tokens = list(map(self.tokenizer.tokenize, documents))
examples = []
for i in range(len(questions)):
examples.append({
'id': i,
'question': q_tokens[i].words(),
'qlemma': q_tokens[i].lemmas(),
'document': d_tokens[i].words(),
'lemma': d_tokens[i].lemmas(),
'pos': d_tokens[i].pos(),
'ner': d_tokens[i].entities(),
})
# Stick document tokens in candidates for decoding
if candidates:
candidates = [{'input': d_tokens[i], 'cands': candidates[i]}
for i in range(len(candidates))]
# Build the batch and run it through the model
batch_exs = batchify([vectorize(e, self.model) for e in examples])
s, e, score = self.model.predict(batch_exs, candidates, top_n)
# Retrieve the predicted spans
results = []
for i in range(len(s)):
predictions = []
for j in range(len(s[i])):
span = d_tokens[i].slice(s[i][j], e[i][j] + 1).untokenize()
predictions.append((span, score[i][j].item()))
results.append(predictions)
return results