def _batch_iterator()

in paq/generation/question_generator/generator.py [0:0]


def _batch_iterator(context_answer_pairs,
                    batch_size,
                    include_title: bool = True,
                    ):

    def _answer_context_pair_2_text(answer, context):
        answer_start, answer_end, answer_text = answer["start"], answer['end'], answer['text']
        return context[:answer_start] + "** " + context[answer_start:answer_end] + " **" + context[answer_end:]

    def _create_input_text(context, answer, title=None) -> str:
        text = _answer_context_pair_2_text(answer, context)

        if title is not None:
            output = f"answer: {answer['text']} | title: {title} | context: {text}"
        else:
            output = f"answer: {answer['text']} | context: {text}"
        return output

    iter_batch = []
    for context_answer_pair in context_answer_pairs:

        passage_id = context_answer_pair["passage_id"]
        context = context_answer_pair["passage"]
        answers = context_answer_pair["answers"]
        title = context_answer_pair["metadata"]["title"] if include_title else None

        for answer in answers:
            input_text = _create_input_text(context, answer, title)
            iter_batch.append((passage_id, answer, input_text))

            if len(iter_batch) >= batch_size:
                yield iter_batch
                iter_batch = []

    if len(iter_batch) > 0:
        yield iter_batch