def generate_substitutions()

in src/generate_substitutions.py [0:0]


def generate_substitutions(args):
    """Initialize the preprocessed dataset and substitution function, then apply 
    the latter to the former, and save the output as JSONLINES file (one example per line).
    """
    dset_name = os.path.basename(args.inpath).split(".")[0]
    preprocessed_dataset = QADataset.load(dset_name)

    sub_fn = SUBSTITUTION_FNS[args.substitution]
    # Only pass in the arguments from args that are identically named in the function signature
    params = inspect.signature(sub_fn).parameters.values()
    sub_exs = sub_fn(
        preprocessed_dataset,
        args.wikidata,
        **{p.name: vars(args)[p.name] for p in params if p.name in vars(args)},
    )

    # Write final substitution set to args.outpath
    os.makedirs(os.path.dirname(args.outpath), exist_ok=True)
    with open(args.outpath, "w") as outf:
        json.dump({"dataset": f"{dset_name}-{args.substitution}"}, outf)
        outf.write("\n")
        for ex in sub_exs:
            json.dump(ex.json_dump(save_full=args.save_full), outf)
            outf.write("\n")