in dataset-construction/src/ndb_data/construction/make_questions.py [0:0]
def generate_joins_extra(hf, facts, qid, question_template, s, r, o, is_subject):
source_mutations = extra_subjects if is_subject else extra_objects
if (
r in source_mutations
and ("argmin" in qid or "argmax" in qid)
and question_template[1].startswith("$s")
):
candidate_additional = [(s, a) for a in source_mutations[r].keys()]
subj_rels = {(h[0], h[1]) for h in hf.keys()}
for additional_subj, additional_rel in filter(
lambda s_a: s_a in subj_rels, candidate_additional
):
found_sros = [
k
for k in hf.keys()
if k[0] == additional_subj and k[1] == additional_rel
]
additional_subj_name = " [LIST] ".join(
(wiki.get_by_id_or_uri(sro[2])["english_name"])
if sro[2].startswith("Q")
else sro[2]
for sro in found_sros
)
modifier = random.choice(source_mutations[r][additional_rel])
subject_name = wiki.get_by_id_or_uri(s)["english_name"]
object_name = (
wiki.get_by_id_or_uri(o)["english_name"] if o.startswith("Q") else o
)
stripped_template = (
question_template[0]
.replace("Which", "$XXT")
.replace("What is the", "$XXT")
.replace("Who is", "$XXT")
.replace("Who has", "$XXT")
)
if "$XXT" not in stripped_template:
print("Unable to template ", question_template[0])
return None
if "$X" in modifier:
newq = modifier.replace(
"$X", stripped_template.replace("$XXT", "").replace("?", "")
)
else:
newq = stripped_template.replace("$XXT", modifier)
extended_question = (
newq,
question_template[1]
.replace("$s", subject_name)
.replace("$o", object_name)
+ " [SEP] "
+ additional_subj_name,
)
hyps = set()
hyps.update(hf[(s, r, o)])
for h in found_sros:
hyps.update(hf[h])
#
# print("\n".join(facts[fact]['fact'] for fact in hyps))
# print(extended_question[0])
# print(extended_question[1])
# print()
yield (
f"{qid}_join_extra_{additional_rel}_{'subj' if is_subject else 'obj'}",
qid.split("_")[0],
extended_question,
hyps,
)