in graspologic/cluster/kclust.py [0:0]
def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "KMeansCluster":
"""
Fits kmeans model to the data.
Parameters
----------
X : array-like, shape (n_samples, n_features)
List of n_features-dimensional data points. Each row
corresponds to a single data point.
y : array-like, shape (n_samples,), optional (default=None)
List of labels for `X` if available. Used to compute ARI scores.
Returns
-------
self
"""
# Deal with number of clusters
if self.max_clusters > X.shape[0]:
msg = "n_components must be >= n_samples, but got \
n_components = {}, n_samples = {}".format(
self.max_clusters, X.shape[0]
)
raise ValueError(msg)
else:
max_clusters = self.max_clusters
# Get parameters
random_state = self.random_state
# Compute all models
models = []
silhouettes = []
aris = []
for n in range(2, max_clusters + 1):
model = KMeans(n_clusters=n, random_state=random_state)
# Fit and compute values
predictions = model.fit_predict(X)
models.append(model)
silhouettes.append(silhouette_score(X, predictions))
if y is not None:
aris.append(adjusted_rand_score(y, predictions))
if y is not None:
self.ari_ = aris
self.silhouette_ = silhouettes
self.n_clusters_ = np.argmax(aris) + 1
self.model_ = models[np.argmax(aris)]
else:
self.ari_ = None
self.silhouette_ = silhouettes
self.n_clusters_ = np.argmax(silhouettes) + 1
self.model_ = models[np.argmax(silhouettes)]
return self