in paq/generation/generate_qa_pairs.py [0:0]
def combine_generated_files(document_ranker_file,
question_generation_file,
output_file
):
# Write final generated QA-pairs to an output file
def _get_passage_score_map(doc_ranker_file):
passage_scores = {}
with open(doc_ranker_file, "r") as f:
for line in f.readlines():
row = json.loads(line)
passage_scores[row["passage_id"]] = row["metadata"].get("ps_score", None)
return passage_scores
def _add_passage_metadata(questions_fi, passage_scores):
generated_qas = load_jsonl(questions_fi)
qas_dict = defaultdict(list)
for qas in generated_qas:
question, answer, passage_id = qas["question"], qas["answer"], qas["passage_id"]
metadata = {"passage_id": passage_id, "ps_score": passage_scores[passage_id], 'answer': answer}
metadata.update(qas["metadata"])
qas_dict[question].append((answer, metadata))
return qas_dict
def _get_output_format(qas_dict):
final_qas = []
for question, answers_meta in qas_dict.items():
answers, metadata_list = zip(*answers_meta)
final_qa = {"question": question, "answer": answers, "metadata": metadata_list}
final_qas.append(final_qa)
return final_qas
passage_score_map = _get_passage_score_map(document_ranker_file)
qas_with_meta = _add_passage_metadata(question_generation_file, passage_score_map)
final_qas = _get_output_format(qas_with_meta)
dump_jsonl(final_qas, output_file)