in src/util/silhouette.py [0:0]
def silhouette_score_manual(X, labels):
"""
Compute the average silhouette score for a clustering without using scikit-learn.
Parameters:
X : array-like of shape (n_samples, n_features)
Feature set.
labels : array-like of shape (n_samples,)
Cluster labels for each point in X.
Returns:
float
The average silhouette score.
"""
unique_labels = np.unique(labels)
n_samples = X.shape[0]
# Initialize a list to hold silhouette scores for each point
silhouette_scores = []
for i in range(n_samples):
current_point = X[i]
current_label = labels[i]
# Get all points in the same cluster as the current point (excluding the point itself)
same_cluster_points = X[(labels == current_label) & (np.arange(n_samples) != i)]
# Intra-cluster distance (mean distance to other points in the same cluster)
if len(same_cluster_points) > 0:
a = average_distance(current_point, same_cluster_points)
else:
a = 0
# Inter-cluster distance (mean distance to points in the nearest cluster)
b = float('inf')
for label in unique_labels:
if label == current_label:
continue
other_cluster_points = X[labels == label]
if len(other_cluster_points) > 0:
distance_to_other_cluster = average_distance(current_point, other_cluster_points)
b = min(b, distance_to_other_cluster)
# Calculate silhouette score for the point
if max(a, b) > 0:
silhouette = (b - a) / max(a, b)
else:
silhouette = 0
silhouette_scores.append(silhouette)
# Return the mean silhouette score across all points
return silhouette_scores, np.mean(silhouette_scores)