in clustering.py [0:0]
def cluster(self, data, verbose=False):
end = time.time()
# preprocess the data
xb = preprocess_features(data)
# construct nnn graph
I, D = make_graph(xb, self.nnn)
# run PIC
clust = run_pic(I, D, self.sigma, self.alpha)
images_lists = {}
for h in set(clust):
images_lists[h] = []
for data, c in enumerate(clust):
images_lists[c].append(data)
# allocate singletons to clusters of their closest NN not singleton
if self.distribute_singletons:
clust_NN = {}
for i in images_lists:
# if singleton
if len(images_lists[i]) == 1:
s = images_lists[i][0]
# for NN
for n in I[s, 1:]:
# if NN is not a singleton
if not len(images_lists[clust[n]]) == 1:
clust_NN[s] = n
break
for s in clust_NN:
del images_lists[clust[s]]
clust[s] = clust[clust_NN[s]]
images_lists[clust[s]].append(s)
self.images_lists = []
for c in images_lists:
self.images_lists.append(images_lists[c])
if verbose:
print('pic time: {0:.0f} s'.format(time.time() - end))
return 0