def convert_kilt_to_fairseq()

in scripts_genre/convert_kilt_to_fairseq.py [0:0]


def convert_kilt_to_fairseq(dataset):

    source = []
    target = []
    for doc in tqdm(dataset, desc="Processing"):
        for title in set(
            prov["title"]
            for out in doc["output"]
            if "provenance" in out
            for prov in out["provenance"]
            if prov.get("bleu_score", 1) > 0.5
        ):
            source.append(create_input(doc, max_length=384, start_delimiter="[START_ENT]", end_delimiter="[END_ENT]"))
            target.append(title)
            if "meta" in doc and "template_questions" in doc["meta"]:
                for template_question in doc["meta"]["template_questions"]:
                    source.append(template_question)
                    target.append(title)

    return source, target