in data_measurements/dataset_statistics.py [0:0]
def load_or_prepare_tokenized_df(self, load_only=False):
if self.use_cache and exists(self.tokenized_df_fid):
self.tokenized_df = ds_utils.read_df(self.tokenized_df_fid)
elif not load_only:
# tokenize all text instances
self.tokenized_df = Tokenize(self.text_dset, feature=TEXT_FIELD,
tok_feature=TOKENIZED_FIELD).get_df()
logs.info("tokenized df is")
logs.info(self.tokenized_df)
if self.save:
logs.warning("Saving tokenized dataset to disk")
# save tokenized text
ds_utils.write_df(self.tokenized_df, self.tokenized_df_fid)