in dataset-construction/src/ndb_data/construction/make_questions.py [0:0]
def generate_joins_filter(hf, facts, qid, question_template, s, r, o, is_subject):
subj_in_q = f"_{s}" if "$s" in question_template[0] else ""
obj_in_q = f"_{o}" if "$o" in question_template[0] else ""
source_mutations = additional_subjects if is_subject else additional_objects
subj_or_obj_in_question = (subj_in_q and is_subject) or (
obj_in_q and not is_subject
)
if subj_or_obj_in_question and r in source_mutations:
candidate_additional = [(s, a) for a in source_mutations[r].keys()]
subj_rels = {(h[0], h[1]) for h in hf.keys()}
for additional_subj, additional_rel in filter(
lambda s_a: s_a in subj_rels, candidate_additional
):
found_sros = [
k
for k in hf.keys()
if k[0] == additional_subj and k[1] == additional_rel
]
found_sro = random.choice(found_sros)
if found_sro[2].startswith("Q"):
additional_object_name = wiki.get_by_id_or_uri(found_sro[2])[
"english_name"
]
else:
additional_object_name = found_sro[2]
modifier = random.choice(source_mutations[r][additional_rel])
modifier = modifier.replace("$AO", additional_object_name)
subject_name = wiki.get_by_id_or_uri(s)["english_name"]
object_name = (
wiki.get_by_id_or_uri(o)["english_name"] if o.startswith("Q") else o
)
extended_question = [
q.replace("$s", modifier if is_subject else subject_name).replace(
"$o", modifier if not is_subject else object_name
)
for q in question_template
]
hyps = set(hf[found_sro]).union(hf[(s, r, o)])
# print("\n".join(facts[fact]['fact'] for fact in hyps))
# print(extended_question[0])
# print()
yield (
f"{qid}_join_{additional_rel}_{found_sro[2]}_{'subj' if is_subject else 'obj'}",
qid.split("_")[0],
extended_question,
hyps,
)