def generate_closed_book_format()

in python/closed_book_processing.py [0:0]


def generate_closed_book_format(data_path, only_english, keep_markup, output_path):
    with open(data_path, "r") as f:
        question_list = []
        answer_list = []
        for website in f:
            content = json.loads(website)
            if only_english and content["Fasttext_language"] != "en":
                continue
            questions = content["Questions"]
            for question in questions:
                question_text = ""
                if "name_markup" in question.keys():
                    extracted_text = extract_text(question["name_markup"], keep_markup)
                    if extracted_text is not None:
                        question_text += extracted_text + " "
                if "text_markup" in question.keys():
                    extracted_text = extract_text(question["text_markup"], keep_markup)
                    if extracted_text is not None:
                        question_text += extracted_text
                if len(question_text) > 0:
                    for answer in question["Answers"]:
                        if "text_markup" in answer.keys():
                            answer_text = extract_text(
                                answer["text_markup"], keep_markup
                            )
                        if (
                            answer_text is not None
                            and len(answer_text.replace("\n", "").replace("\r", "")) > 0
                        ):
                            question_list.append(question_text)
                            answer_list.append(answer_text)

    with open(output_path + ".source", "w") as f:
        for element in question_list:
            f.write(element.replace("\n", "").replace("\r", "") + "\n")
    with open(output_path + ".target", "w") as f:
        for element in answer_list:
            f.write(element.replace("\n", "").replace("\r", "") + "\n")