def generate_derivations()

in dataset-construction/src/ndb_data/construction/make_questions.py [0:0]


def generate_derivations(hypotheses_facts, facts):
    derivations = []

    # For each S,R,O triple in the DB,generate a question_answer derivation from it
    for hyp in tqdm(hypotheses_facts.keys()):
        s, r, o = hyp

        # If not it's a relation that we have templates for skip it
        if r not in final_templates:
            continue

        # Get the canonical subject/object name
        subject_name = wiki.get_by_id_or_uri(s)["english_name"]
        object_name = (
            o if not o.startswith("Q") else wiki.get_by_id_or_uri(o)["english_name"]
        )

        question_types = set(final_templates[r].keys()).difference(
            {"fact", "_subject", "_object"}
        )

        # For all quesiton types (bool, set, min, max etc)
        for q_type in question_types:
            try:
                # Sample a question template
                question = random.choice(final_templates[r][q_type])

                # Swap the subject_name and object_name into the template
                out = [
                    q.replace("$s", subject_name).replace("$o", object_name)
                    for q in question
                ]

                # Make the question ID
                subj_in_q = f"_{s}" if "$s" in question[0] else ""
                obj_in_q = f"_{o}" if "$o" in question[0] else ""
                s_key = (
                    question[1].split("[SEP]")[0].strip() if "$" in question[1] else ""
                )
                sort_key = (
                    (f"_{s_key}" if "$" in question[1] else "")
                    if r != "P47"
                    else "_$both"
                )
                qid = f"{q_type}_{r}{subj_in_q}{obj_in_q}{sort_key}"

                # Add this to the derivations
                derivations.append((qid, q_type, out, hypotheses_facts[hyp]))

                gj = list(
                    generate_joins(hypotheses_facts, facts, qid, question, s, r, o)
                )
                derivations.extend(gj)

                # If the question is boolean generate a negatively sampled false one for it
                if q_type == "bool":
                    generated = generate_negative_bool(hypotheses_facts, question, hyp)
                    if generated is not None:
                        derivations.append(generated)
            except Exception:
                continue

    return derivations