def combine_generated_files()

in paq/generation/generate_qa_pairs.py [0:0]


def combine_generated_files(document_ranker_file,
                            question_generation_file,
                            output_file
                            ):
    # Write final generated QA-pairs to an output file

    def _get_passage_score_map(doc_ranker_file):
        passage_scores = {}
        with open(doc_ranker_file, "r") as f:
            for line in f.readlines():
                row = json.loads(line)
                passage_scores[row["passage_id"]] = row["metadata"].get("ps_score", None)
        return passage_scores

    def _add_passage_metadata(questions_fi, passage_scores):
        generated_qas = load_jsonl(questions_fi)
        qas_dict = defaultdict(list)
        for qas in generated_qas:
            question, answer, passage_id = qas["question"], qas["answer"], qas["passage_id"]
            metadata = {"passage_id": passage_id, "ps_score": passage_scores[passage_id], 'answer': answer}
            metadata.update(qas["metadata"])
            qas_dict[question].append((answer, metadata))
        return qas_dict

    def _get_output_format(qas_dict):
        final_qas = []
        for question, answers_meta in qas_dict.items():
            answers, metadata_list = zip(*answers_meta)
            final_qa = {"question": question, "answer": answers, "metadata": metadata_list}
            final_qas.append(final_qa)
        return final_qas

    passage_score_map = _get_passage_score_map(document_ranker_file)
    qas_with_meta = _add_passage_metadata(question_generation_file, passage_score_map)
    final_qas = _get_output_format(qas_with_meta)
    dump_jsonl(final_qas, output_file)