def load_or_prepare_tokenized_df()

in data_measurements/dataset_statistics.py [0:0]


    def load_or_prepare_tokenized_df(self, load_only=False):
        if self.use_cache and exists(self.tokenized_df_fid):
            self.tokenized_df = ds_utils.read_df(self.tokenized_df_fid)
        elif not load_only:
            # tokenize all text instances
            self.tokenized_df = Tokenize(self.text_dset, feature=TEXT_FIELD,
                                         tok_feature=TOKENIZED_FIELD).get_df()
            logs.info("tokenized df is")
            logs.info(self.tokenized_df)
            if self.save:
                logs.warning("Saving tokenized dataset to disk")
                # save tokenized text
                ds_utils.write_df(self.tokenized_df, self.tokenized_df_fid)