in src/classes/qadataset.py [0:0]
def save(self):
"""Save the preprocessed dataset to JSONL.GZ file. Can be loaded using `self.load()`."""
os.makedirs(os.path.dirname(self.preprocessed_path), exist_ok=True)
with gzip.open(self.preprocessed_path, "wt") as outf:
json.dump({"dataset": self.name, "original_path": self.original_path}, outf)
outf.write("\n")
for ex in self.examples:
json.dump(ex.json_dump(), outf)
outf.write("\n")
print(f"Saved preprocessed dataset to {self.preprocessed_path}")