in data_measurements/dataset_statistics.py [0:0]
def load_or_prepare_dataset(self, load_only=False):
"""
Prepares the HF dataset text/feature based on given config, split, etc.
Args:
load_only: Whether only a cached dataset can be used.
"""
logs.info("Doing text dset.")
if self.use_cache and exists(self.text_dset_fid):
# load extracted text
self.text_dset = load_from_disk(self.text_dset_fid)
logs.warning("Loaded dataset from disk")
logs.warning(self.text_dset)
# ...Or load it from the server and store it anew
elif not load_only:
self.prepare_text_dset()
if self.save:
# save extracted text instances
logs.warning("Saving dataset to disk")
self.text_dset.save_to_disk(self.text_dset_fid)