def generate_joins_extra()

in dataset-construction/src/ndb_data/construction/make_questions.py [0:0]


def generate_joins_extra(hf, facts, qid, question_template, s, r, o, is_subject):
    source_mutations = extra_subjects if is_subject else extra_objects

    if (
        r in source_mutations
        and ("argmin" in qid or "argmax" in qid)
        and question_template[1].startswith("$s")
    ):
        candidate_additional = [(s, a) for a in source_mutations[r].keys()]

        subj_rels = {(h[0], h[1]) for h in hf.keys()}
        for additional_subj, additional_rel in filter(
            lambda s_a: s_a in subj_rels, candidate_additional
        ):
            found_sros = [
                k
                for k in hf.keys()
                if k[0] == additional_subj and k[1] == additional_rel
            ]

            additional_subj_name = " [LIST] ".join(
                (wiki.get_by_id_or_uri(sro[2])["english_name"])
                if sro[2].startswith("Q")
                else sro[2]
                for sro in found_sros
            )
            modifier = random.choice(source_mutations[r][additional_rel])

            subject_name = wiki.get_by_id_or_uri(s)["english_name"]
            object_name = (
                wiki.get_by_id_or_uri(o)["english_name"] if o.startswith("Q") else o
            )

            stripped_template = (
                question_template[0]
                .replace("Which", "$XXT")
                .replace("What is the", "$XXT")
                .replace("Who is", "$XXT")
                .replace("Who has", "$XXT")
            )

            if "$XXT" not in stripped_template:
                print("Unable to template ", question_template[0])
                return None

            if "$X" in modifier:
                newq = modifier.replace(
                    "$X", stripped_template.replace("$XXT", "").replace("?", "")
                )
            else:
                newq = stripped_template.replace("$XXT", modifier)

            extended_question = (
                newq,
                question_template[1]
                .replace("$s", subject_name)
                .replace("$o", object_name)
                + " [SEP] "
                + additional_subj_name,
            )

            hyps = set()
            hyps.update(hf[(s, r, o)])
            for h in found_sros:
                hyps.update(hf[h])

            #
            # print("\n".join(facts[fact]['fact'] for fact in hyps))
            # print(extended_question[0])
            # print(extended_question[1])
            # print()
            yield (
                f"{qid}_join_extra_{additional_rel}_{'subj' if is_subject else 'obj'}",
                qid.split("_")[0],
                extended_question,
                hyps,
            )