in src/jobs/util/silhouette.py [0:0]
def silh_find_optimal_k(X, cluster_space):
best_k = None
best_silhouette = -1
silhouette_scores = []
for k in cluster_space:
print("k is ")
print(k)
if k >= X.shape[0]:
break
print(X.shape)
kmeans = KMeans(n_clusters=k, random_state=42)
labels = kmeans.fit_predict(X)
silhouette_avg = silhouette_score(X, labels)
# silhouette_avg = silhouette_score_manual(X, labels)
# print(f"scikit {silhouette_scikit} custom {silhouette_avg}")
silhouette_scores.append(silhouette_avg)
print(f"Number of clusters: {k}, Silhouette Score: {silhouette_avg}")
if silhouette_avg > best_silhouette:
best_silhouette = silhouette_avg
best_k = k
print(f"Best k is {best_k}")
return best_k