in ridge_sketch.py [0:0]
def fit(self, X, y):
"""
Fit ridge regression model based on solver and data
Sets coef_ and intercept_ attributes by solving
min ||y - Xw||^2 + alpha * ||w||^2
Args:
X (2D numpy.ndarray): data matrix (m, m)
y (2D numpy.ndarray): labels (m, 1)
"""
if any([isinstance(z, (int, np.integer)) for z in y.flatten()]):
y = y.astype(np.float64)
warn(
"Vector of labels must be an array of floats, it has been automatically converted."
)
if y.ndim == 1 or y.shape[0] == 1:
y = y.reshape(-1, 1)
warn(
f"Vector of labels must be a numpy column 2D array, it has been reshaped into a {y.shape} array."
)
if self.solver in RidgeSketch.SKLEARN_SOLVERS:
# # Pass attributes of the RigdeSketch object to the sklearn model
model = linear_model.Ridge(
alpha=self.alpha,
fit_intercept=self.fit_intercept,
max_iter=self.max_iter,
tol=self.tol,
solver=self.solver,
)
model.fit(X, y)
self.coef_ = model.coef_.reshape(-1, 1)
A = AMatrix(alpha=self.alpha, X=X)
n_samples, n_features = X.shape
if n_features > n_samples:
# dual system
b = y
w, _, _, _ = linalg.lstsq(X.T, self.coef_) # dual coef
else:
# primal system
b = safe_sparse_dot(X.T, y, dense_output=True)
w = self.coef_
residual_norm = np.linalg.norm(A @ w - b) / np.linalg.norm(b)
self.residual_norms.append(residual_norm)
# self.residual_norms.append(np.linalg.norm(np.matmul(X, model.coef_.T) - y)) # should be relative
self.intercept_ = model.intercept_
elif self.solver in RidgeSketch.SKETCH_SOLVERS:
# if m <= self.direct_solver_th and not self.solver == "direct":
# # If dimension of A is small, use direct solver instead of sketching method
# warn(
# f"Using direct solver since dimension of the problem is smaller than {self.direct_solver_th:d}"
# )
# self.solver = "direct"
if self.fit_intercept:
X = RidgeSketch.add_col_for_intercept(X)
self.sketch_solver_setup(X, y)
else:
raise NotImplementedError(f"{self.solver} solver not implemented")