in data_measurements/dataset_statistics.py [0:0]
def load_or_prepare_zipf(self, load_only=False):
zipf_json_fid, zipf_fig_json_fid, zipf_fig_html_fid = zipf.get_zipf_fids(
self.dataset_cache_dir)
if self.use_cache and exists(zipf_json_fid):
# Zipf statistics
# Read Zipf statistics: Alpha, p-value, etc.
with open(zipf_json_fid, "r") as f:
zipf_dict = json.load(f)
self.z = zipf.Zipf(self.vocab_counts_df)
self.z.load(zipf_dict)
# Zipf figure
if exists(zipf_fig_json_fid):
self.zipf_fig = ds_utils.read_plotly(zipf_fig_json_fid)
elif not load_only:
self.zipf_fig = zipf.make_zipf_fig(self.z)
if self.save:
ds_utils.write_plotly(self.zipf_fig)
elif not load_only:
self.prepare_zipf()
if self.save:
zipf_dict = self.z.get_zipf_dict()
ds_utils.write_json(zipf_dict, zipf_json_fid)
ds_utils.write_plotly(self.zipf_fig, zipf_fig_json_fid)
self.zipf_fig.write_html(zipf_fig_html_fid)