experiments/compare_conjugate_gradient.py [35:232]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    BostonDataset,
    CaliforniaHousingDataset,
    YearPredictionDataset,
    Rcv1Dataset,
)
from ridge_sketch import RidgeSketch
from kernel_ridge_sketch import KernelRidgeSketch
from benchmarks import compute_distribution, pad_residual_norms, update_times
from experiments.plot_experiments import plot_runs_over_iterations, plot_runs_over_time


class ExperimentSketches:
    """
    Class to run the experiments for different sketches
    """

    def __init__(
        self,
        dataset_name,
        X,
        y,
        regularizer,
        is_kernel,
        use_heuristic,
        tolerance,
        max_iter,
        solvers,
        sketch_size,
        sketch_size_formula,
        n_repetitions,
    ):
        self.dataset_name = dataset_name
        self.X = X
        self.y = y
        self.regularizer = regularizer
        self.is_kernel = is_kernel
        self.use_heuristic = use_heuristic
        self.tolerance = tolerance
        self.max_iter = max_iter

        # List of sketch solvers
        self.solvers = solvers

        # Only one sketch size here
        self.sketch_size = sketch_size
        self.sketch_size_formula = sketch_size_formula

        # For error areas 1st/3rd quartiles
        self.n_repetitions = n_repetitions

        # df of times of all runs of the experiment
        self.times_df = None

        # df of relative residual norms of all runs of the experiment
        self.residual_norms_df = None

    def run_full_exp(self, verbose=True):
        algo_mode = "mom"
        mom_beta = None
        step_size = None
        mom_eta = 0.995  # increasing momentum parameter

        # dict of the outputs
        times = defaultdict(list)
        residual_norms = {}
        counter = 1
        n_settings = len(self.solvers)
        for solver in self.solvers:
            if solver == "coordinate descent":
                sketch_size = 1
                sketch_size_formula = "1"
            else:
                sketch_size = self.sketch_size
                sketch_size_formula = self.sketch_size_formula

            run_name = self.dataset_name  # solver
            print(f"----> Setting {counter} over {n_settings} : {solver}")
            (
                times_distribution,
                residual_norms_distribution,
            ) = self.compute_fit_time_and_residual(
                solver, sketch_size, algo_mode, step_size, mom_beta, mom_eta,
            )

            # Storing the results
            self.book_keeping(
                run_name,
                solver,
                sketch_size_formula,
                sketch_size,
                times_distribution,
                residual_norms_distribution,
                times,
                residual_norms,
            )
            counter += 1
            print("\n")

        # converting the outputs to dataframes
        times_df = pd.DataFrame(times)
        if verbose:
            print(f"residual_norms:\n{residual_norms}")
        residual_norms_df = pd.DataFrame.from_dict(
            residual_norms, orient="index"
        ).transpose()

        self.times_df = times_df
        self.residual_norms_df = residual_norms_df

        return times_df, residual_norms_df

    def compute_fit_time_and_residual(
        self, solver, sketch_size, algo_mode, step_size, mom_beta, mom_eta
    ):
        """
        Repeats model fit for n_repetitions.

        Returns quartile 1, 3 and median time taken
        """
        times = []
        residual_norms = []
        for repetition_idx in range(self.n_repetitions):
            print(
                f"--------> Repetition {repetition_idx+1} / " f"{self.n_repetitions}",
                end="\r",
            )

            random_state = repetition_idx
            np.random.seed(seed=random_state)
            model = load_model(
                solver,
                sketch_size,
                algo_mode,
                step_size,
                mom_beta,
                mom_eta,
                self.regularizer,
                self.is_kernel,
                self.use_heuristic,
                self.tolerance,
                self.max_iter,
                random_state,
            )
            start = default_timer()
            model.fit(self.X, self.y)
            times.append(default_timer() - start)
            residual_norms.append(model.residual_norms)

        times_distribution = compute_distribution(times)
        residual_norms = pad_residual_norms(residual_norms)
        residual_norms_distribution = compute_distribution(residual_norms)

        return times_distribution, residual_norms_distribution

    def book_keeping(
        self,
        run_name,
        solver,
        sketch_size_formula,
        sketch_size,
        times_distribution,
        residual_norms_distribution,
        run_times,
        run_residual_norms,
    ):
        """Store time and relative residual norms distribution for each solver"""
        description = {
            "run_name": run_name,
            "solver": solver,
            "sketch_size_formula": sketch_size_formula,
            "sketch_size": sketch_size,
        }
        description_string = (
            f"{run_name} | {solver} | sketch_size = "
            f"{sketch_size_formula} = {sketch_size}"
        )

        run_time = {
            **description,
            "time (median)": times_distribution.median,
            "time (1st quartile)": times_distribution.q1,
            "time (3rd quartile)": times_distribution.q3,
        }
        run_times = update_times(run_times, run_time)

        run_residual_norm = {
            f"{description_string} | residual_norms (median)": residual_norms_distribution.median,
            f"{description_string} | residual_norms (1st quartile)": residual_norms_distribution.q1,
            f"{description_string} | residual_norms (3rd quartile)": residual_norms_distribution.q3,
        }
        run_residual_norms.update(run_residual_norm)

    def save(self, exp_name, save_path):
        full_path = os.path.join(save_path, exp_name)
        # clear old results
        shutil.rmtree(full_path, ignore_errors=True)
        os.makedirs(full_path)
        self.save_settings(exp_name, full_path)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



experiments/different_sketches.py [39:236]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    BostonDataset,
    CaliforniaHousingDataset,
    YearPredictionDataset,
    Rcv1Dataset,
)
from ridge_sketch import RidgeSketch
from kernel_ridge_sketch import KernelRidgeSketch
from benchmarks import compute_distribution, pad_residual_norms, update_times
from experiments.plot_experiments import plot_runs_over_iterations, plot_runs_over_time


class ExperimentSketches:
    """
    Class to run the experiments for different sketches
    """

    def __init__(
        self,
        dataset_name,
        X,
        y,
        regularizer,
        is_kernel,
        use_heuristic,
        tolerance,
        max_iter,
        solvers,
        sketch_size,
        sketch_size_formula,
        n_repetitions,
    ):
        self.dataset_name = dataset_name
        self.X = X
        self.y = y
        self.regularizer = regularizer
        self.is_kernel = is_kernel
        self.use_heuristic = use_heuristic
        self.tolerance = tolerance
        self.max_iter = max_iter

        # List of sketch solvers
        self.solvers = solvers

        # Only one sketch size here
        self.sketch_size = sketch_size
        self.sketch_size_formula = sketch_size_formula

        # For error areas 1st/3rd quartiles
        self.n_repetitions = n_repetitions

        # df of times of all runs of the experiment
        self.times_df = None

        # df of relative residual norms of all runs of the experiment
        self.residual_norms_df = None

    def run_full_exp(self, verbose=True):
        algo_mode = "mom"
        mom_beta = None
        step_size = None
        mom_eta = 0.995  # increasing momentum parameter

        # dict of the outputs
        times = defaultdict(list)
        residual_norms = {}
        counter = 1
        n_settings = len(self.solvers)
        for solver in self.solvers:
            if solver == "coordinate descent":
                sketch_size = 1
                sketch_size_formula = "1"
            else:
                sketch_size = self.sketch_size
                sketch_size_formula = self.sketch_size_formula

            run_name = self.dataset_name  # solver
            print(f"----> Setting {counter} over {n_settings} : {solver}")
            (
                times_distribution,
                residual_norms_distribution,
            ) = self.compute_fit_time_and_residual(
                solver, sketch_size, algo_mode, step_size, mom_beta, mom_eta,
            )

            # Storing the results
            self.book_keeping(
                run_name,
                solver,
                sketch_size_formula,
                sketch_size,
                times_distribution,
                residual_norms_distribution,
                times,
                residual_norms,
            )
            counter += 1
            print("\n")

        # converting the outputs to dataframes
        times_df = pd.DataFrame(times)
        if verbose:
            print(f"residual_norms:\n{residual_norms}")
        residual_norms_df = pd.DataFrame.from_dict(
            residual_norms, orient="index"
        ).transpose()

        self.times_df = times_df
        self.residual_norms_df = residual_norms_df

        return times_df, residual_norms_df

    def compute_fit_time_and_residual(
        self, solver, sketch_size, algo_mode, step_size, mom_beta, mom_eta
    ):
        """
        Repeats model fit for n_repetitions.

        Returns quartile 1, 3 and median time taken
        """
        times = []
        residual_norms = []
        for repetition_idx in range(self.n_repetitions):
            print(
                f"--------> Repetition {repetition_idx+1} / " f"{self.n_repetitions}",
                end="\r",
            )

            random_state = repetition_idx
            np.random.seed(seed=random_state)
            model = load_model(
                solver,
                sketch_size,
                algo_mode,
                step_size,
                mom_beta,
                mom_eta,
                self.regularizer,
                self.is_kernel,
                self.use_heuristic,
                self.tolerance,
                self.max_iter,
                random_state,
            )
            start = default_timer()
            model.fit(self.X, self.y)
            times.append(default_timer() - start)
            residual_norms.append(model.residual_norms)

        times_distribution = compute_distribution(times)
        residual_norms = pad_residual_norms(residual_norms)
        residual_norms_distribution = compute_distribution(residual_norms)

        return times_distribution, residual_norms_distribution

    def book_keeping(
        self,
        run_name,
        solver,
        sketch_size_formula,
        sketch_size,
        times_distribution,
        residual_norms_distribution,
        run_times,
        run_residual_norms,
    ):
        """Store time and relative residual norms distribution for each solver"""
        description = {
            "run_name": run_name,
            "solver": solver,
            "sketch_size_formula": sketch_size_formula,
            "sketch_size": sketch_size,
        }
        description_string = (
            f"{run_name} | {solver} | sketch_size = "
            f"{sketch_size_formula} = {sketch_size}"
        )

        run_time = {
            **description,
            "time (median)": times_distribution.median,
            "time (1st quartile)": times_distribution.q1,
            "time (3rd quartile)": times_distribution.q3,
        }
        run_times = update_times(run_times, run_time)

        run_residual_norm = {
            f"{description_string} | residual_norms (median)": residual_norms_distribution.median,
            f"{description_string} | residual_norms (1st quartile)": residual_norms_distribution.q1,
            f"{description_string} | residual_norms (3rd quartile)": residual_norms_distribution.q3,
        }
        run_residual_norms.update(run_residual_norm)

    def save(self, exp_name, save_path):
        full_path = os.path.join(save_path, exp_name)
        # clear old results
        shutil.rmtree(full_path, ignore_errors=True)
        os.makedirs(full_path)
        self.save_settings(exp_name, full_path)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



