def silh_find_optimal_k()

in src/jobs/util/silhouette.py [0:0]


def silh_find_optimal_k(X, cluster_space):
    best_k = None
    best_silhouette = -1
    silhouette_scores = []

    for k in cluster_space:
        print("k is ")
        print(k)
        if k >= X.shape[0]:
            break
        print(X.shape)
        kmeans = KMeans(n_clusters=k, random_state=42)
        labels = kmeans.fit_predict(X)
        silhouette_avg = silhouette_score(X, labels)
        # silhouette_avg = silhouette_score_manual(X, labels)
        # print(f"scikit {silhouette_scikit} custom {silhouette_avg}")
        silhouette_scores.append(silhouette_avg)

        print(f"Number of clusters: {k}, Silhouette Score: {silhouette_avg}")

        if silhouette_avg > best_silhouette:
            best_silhouette = silhouette_avg
            best_k = k
    print(f"Best k is {best_k}")
    return best_k