in jupyter/comparison-to-datasketch/cardinality_error_experiment.py [0:0]
def __init__(self, sketch_lgk:int, lg_trials:int, max_lgN:int):
self.sketch_lgk = sketch_lgk
self.num_trials = 2**lg_trials
self.max_lgN = max_lgN
self.max_num_distincts = np.uint64(2 ** self.max_lgN)
self.directory_name = "hll_accuracy_profile_" + datetime.today().strftime('%Y%m%d')
if not os.path.exists(self.directory_name):
os.mkdir(self.directory_name)
self.file_extension = "_" + datetime.today().strftime('%H%M') + f"lgK_{self.sketch_lgk}_lgT_{lg_trials}"
# Need to remove repeated items for the program logic in self.run()
self.plot_points = self._generate_plot_points()
self.plot_points.extend(self._generate_plot_points())
self.plot_points = list(set(self.plot_points))
self.plot_points.sort()
print(self.plot_points)
# Initialise the data structures for results
self.DataSketches_results_arr = np.zeros((len(self.plot_points), self.num_trials), dtype=float)
self.datasketch_results_arr = np.zeros_like(self.DataSketches_results_arr)
self.DataSketches_results_df = pd.DataFrame(index=self.plot_points, columns=None)
self.datasketch_results_df = pd.DataFrame(index=self.plot_points, columns=None)
self.data = np.random.randn(len(self.plot_points), self.num_trials)
print("Data shape: ", self.data.shape)