in src/generate_substitutions.py [0:0]
def generate_substitutions(args):
"""Initialize the preprocessed dataset and substitution function, then apply
the latter to the former, and save the output as JSONLINES file (one example per line).
"""
dset_name = os.path.basename(args.inpath).split(".")[0]
preprocessed_dataset = QADataset.load(dset_name)
sub_fn = SUBSTITUTION_FNS[args.substitution]
# Only pass in the arguments from args that are identically named in the function signature
params = inspect.signature(sub_fn).parameters.values()
sub_exs = sub_fn(
preprocessed_dataset,
args.wikidata,
**{p.name: vars(args)[p.name] for p in params if p.name in vars(args)},
)
# Write final substitution set to args.outpath
os.makedirs(os.path.dirname(args.outpath), exist_ok=True)
with open(args.outpath, "w") as outf:
json.dump({"dataset": f"{dset_name}-{args.substitution}"}, outf)
outf.write("\n")
for ex in sub_exs:
json.dump(ex.json_dump(save_full=args.save_full), outf)
outf.write("\n")