in data_measurements/zipf/zipf.py [0:0]
def __init__(self, vocab_counts_df, count_str="count",
proportion_str="prop"):
self.vocab_counts_df = vocab_counts_df
# Strings used in the input dictionary
self.cnt_str = count_str
self.prop_str = proportion_str
self.alpha = None
self.xmin = None
self.xmax = None
self.p = None
self.ks_distance = None
self.observed_counts = None
self.word_counts_unique = None
self.word_ranks_unique = None
if self.vocab_counts_df is not None:
self.observed_counts = self.vocab_counts_df[self.cnt_str].values
self.word_counts_unique = list(set(self.observed_counts))
self.word_ranks_unique = list(
np.arange(1, len(self.word_counts_unique) + 1))
self.zipf_dict = {"xmin": None, "xmax": None, "alpha": None,
"ks_distance": None, "p-value": None,
"word_ranks_unique": self.word_ranks_unique,
"word_counts_unique": self.word_counts_unique}
self.fit = None
self.predicted_counts = None