def silhouette_score_manual()

in src/util/silhouette.py [0:0]


def silhouette_score_manual(X, labels):
    """
    Compute the average silhouette score for a clustering without using scikit-learn.

    Parameters:
    X : array-like of shape (n_samples, n_features)
        Feature set.
    labels : array-like of shape (n_samples,)
        Cluster labels for each point in X.

    Returns:
    float
        The average silhouette score.
    """
    unique_labels = np.unique(labels)
    n_samples = X.shape[0]

    # Initialize a list to hold silhouette scores for each point
    silhouette_scores = []

    for i in range(n_samples):
        current_point = X[i]
        current_label = labels[i]

        # Get all points in the same cluster as the current point (excluding the point itself)
        same_cluster_points = X[(labels == current_label) & (np.arange(n_samples) != i)]

        # Intra-cluster distance (mean distance to other points in the same cluster)
        if len(same_cluster_points) > 0:
            a = average_distance(current_point, same_cluster_points)
        else:
            a = 0

        # Inter-cluster distance (mean distance to points in the nearest cluster)
        b = float('inf')
        for label in unique_labels:
            if label == current_label:
                continue
            other_cluster_points = X[labels == label]
            if len(other_cluster_points) > 0:
                distance_to_other_cluster = average_distance(current_point, other_cluster_points)
                b = min(b, distance_to_other_cluster)

        # Calculate silhouette score for the point
        if max(a, b) > 0:
            silhouette = (b - a) / max(a, b)
        else:
            silhouette = 0

        silhouette_scores.append(silhouette)

    # Return the mean silhouette score across all points
    return silhouette_scores, np.mean(silhouette_scores)