in bindings/python-compute/performance_comparison.py [0:0]
def compute_clusters(self):
# self.clustering_model = AgglomerativeClustering(
# n_clusters=self.n_clusters, affinity='euclidean', linkage='ward', compute_full_tree=True)
self.clustering_model = KMeans(n_clusters=self.n_clusters, precompute_distances=True, random_state=0)
segment_ids = self.clustering_model.fit_predict(self.metric_df[self.clustering_columns])
self.metric_df['clusters'] = segment_ids
gb = self.metric_df.groupby('clusters')
# sorting clusters based on model_0 median performance
mean_df = gb.aggregate(np.median).rename(columns={cc: 'model_' + cc.split('_')[1] for cc in self.ipd})
sorted_ind = mean_df['model_0'].sort_values(ascending=True, inplace=False).index
cluster_id_map = {v: k for (k, v) in enumerate(list(sorted_ind))}
self.metric_df['clusters'] = self.metric_df['clusters'].apply(lambda x: cluster_id_map[x])
self.segment_ids = self.metric_df['clusters']