def generate_joins_filter()

in dataset-construction/src/ndb_data/construction/make_questions.py [0:0]


def generate_joins_filter(hf, facts, qid, question_template, s, r, o, is_subject):
    subj_in_q = f"_{s}" if "$s" in question_template[0] else ""
    obj_in_q = f"_{o}" if "$o" in question_template[0] else ""

    source_mutations = additional_subjects if is_subject else additional_objects
    subj_or_obj_in_question = (subj_in_q and is_subject) or (
        obj_in_q and not is_subject
    )

    if subj_or_obj_in_question and r in source_mutations:
        candidate_additional = [(s, a) for a in source_mutations[r].keys()]

        subj_rels = {(h[0], h[1]) for h in hf.keys()}
        for additional_subj, additional_rel in filter(
            lambda s_a: s_a in subj_rels, candidate_additional
        ):
            found_sros = [
                k
                for k in hf.keys()
                if k[0] == additional_subj and k[1] == additional_rel
            ]
            found_sro = random.choice(found_sros)

            if found_sro[2].startswith("Q"):
                additional_object_name = wiki.get_by_id_or_uri(found_sro[2])[
                    "english_name"
                ]
            else:
                additional_object_name = found_sro[2]

            modifier = random.choice(source_mutations[r][additional_rel])
            modifier = modifier.replace("$AO", additional_object_name)

            subject_name = wiki.get_by_id_or_uri(s)["english_name"]
            object_name = (
                wiki.get_by_id_or_uri(o)["english_name"] if o.startswith("Q") else o
            )
            extended_question = [
                q.replace("$s", modifier if is_subject else subject_name).replace(
                    "$o", modifier if not is_subject else object_name
                )
                for q in question_template
            ]

            hyps = set(hf[found_sro]).union(hf[(s, r, o)])

            # print("\n".join(facts[fact]['fact'] for fact in hyps))
            # print(extended_question[0])
            # print()
            yield (
                f"{qid}_join_{additional_rel}_{found_sro[2]}_{'subj' if is_subject else 'obj'}",
                qid.split("_")[0],
                extended_question,
                hyps,
            )