in paq/generation/question_generator/generator.py [0:0]
def _batch_iterator(context_answer_pairs,
batch_size,
include_title: bool = True,
):
def _answer_context_pair_2_text(answer, context):
answer_start, answer_end, answer_text = answer["start"], answer['end'], answer['text']
return context[:answer_start] + "** " + context[answer_start:answer_end] + " **" + context[answer_end:]
def _create_input_text(context, answer, title=None) -> str:
text = _answer_context_pair_2_text(answer, context)
if title is not None:
output = f"answer: {answer['text']} | title: {title} | context: {text}"
else:
output = f"answer: {answer['text']} | context: {text}"
return output
iter_batch = []
for context_answer_pair in context_answer_pairs:
passage_id = context_answer_pair["passage_id"]
context = context_answer_pair["passage"]
answers = context_answer_pair["answers"]
title = context_answer_pair["metadata"]["title"] if include_title else None
for answer in answers:
input_text = _create_input_text(context, answer, title)
iter_batch.append((passage_id, answer, input_text))
if len(iter_batch) >= batch_size:
yield iter_batch
iter_batch = []
if len(iter_batch) > 0:
yield iter_batch