in benchmarks/run_benchmarks.py [0:0]
def run(self, n_repetitions=1, verbose=False):
"""Returns pandas dataframe with benchmarks"""
benchmark_times = defaultdict(list)
benchmark_residual_norms = {}
for (
sparse_format,
problem,
solver,
sketch_size,
operator_mode,
algo_mode,
accel_param,
) in tqdm(self.get_config_combinations()):
is_small = False
if problem.endswith("_small"):
dataset_name = problem[:-6]
is_small = True # takes only the first 100 rows
else:
dataset_name = problem
dataset_name = "".join([x.capitalize() for x in dataset_name.split("_")])
dataset_name += "Dataset"
if dataset_name in DATASET_CLASSES:
# problem involves a real dataset
dataset_class = getattr(data_loaders, dataset_name)
dataset = dataset_class(is_small=is_small)
# sparse_format = dataset.sparse_format
X, y = dataset.load_X_y()
elif problem == "primal_random" and sparse_format == "csc":
X, y = self.X_primal_csc_random, self.y_primal_random
elif problem == "primal_random" and sparse_format == "csr":
X, y = self.X_primal_csr_random, self.y_primal_random
elif problem == "primal_random" and sparse_format == "dense":
X, y = self.X_primal_dense_random, self.y_primal_random
elif problem == "dual_random" and sparse_format == "csc":
X, y = self.X_dual_csc_random, self.y_dual_random
elif problem == "dual_random" and sparse_format == "csr":
X, y = self.X_dual_csr_random, self.y_dual_random
elif problem == "dual_random" and sparse_format == "dense":
X, y = self.X_dual_dense_random, self.y_dual_random
else:
raise ValueError(f"Problem type {problem} not supported")
# convert to dense for direct solver and conjugate gradients
# note conversion is excluded from timing
(
times_distribution,
residual_norms_distribution,
) = self.compute_fit_time_and_residual(
X,
y,
problem,
sparse_format,
solver,
sketch_size,
operator_mode,
algo_mode,
accel_param,
n_repetitions=n_repetitions,
)
self.book_keeping(
problem,
sparse_format,
solver,
sketch_size,
operator_mode,
algo_mode,
accel_param,
residual_norms_distribution,
times_distribution,
benchmark_times,
benchmark_residual_norms,
)
# End big For loop
times_df = pd.DataFrame(benchmark_times)
if verbose:
print(benchmark_residual_norms)
residual_norms_df = pd.DataFrame.from_dict(
benchmark_residual_norms, orient="index"
).transpose()
self.times_df = times_df
self.residual_norms_df = residual_norms_df
return times_df, residual_norms_df