def __init__()

in jupyter/comparison-to-datasketch/cardinality_error_experiment.py [0:0]


    def __init__(self, sketch_lgk:int, lg_trials:int, max_lgN:int):
        self.sketch_lgk = sketch_lgk
        self.num_trials = 2**lg_trials
        self.max_lgN = max_lgN
        self.max_num_distincts = np.uint64(2 ** self.max_lgN)
        self.directory_name = "hll_accuracy_profile_" + datetime.today().strftime('%Y%m%d')
        if not os.path.exists(self.directory_name):
            os.mkdir(self.directory_name)
        self.file_extension = "_" + datetime.today().strftime('%H%M') + f"lgK_{self.sketch_lgk}_lgT_{lg_trials}"

        # Need to remove repeated items for the program logic in self.run()
        self.plot_points = self._generate_plot_points()
        self.plot_points.extend(self._generate_plot_points())
        self.plot_points = list(set(self.plot_points))
        self.plot_points.sort()
        print(self.plot_points)

        # Initialise the data structures for results
        self.DataSketches_results_arr = np.zeros((len(self.plot_points), self.num_trials), dtype=float)
        self.datasketch_results_arr = np.zeros_like(self.DataSketches_results_arr)
        self.DataSketches_results_df = pd.DataFrame(index=self.plot_points, columns=None)
        self.datasketch_results_df = pd.DataFrame(index=self.plot_points, columns=None)
        self.data = np.random.randn(len(self.plot_points), self.num_trials)
        print("Data shape: ", self.data.shape)