in fairmotion/tasks/clustering/clustering.py [0:0]
def main(args):
features = []
names = []
with open(args.features) as f:
for line in f:
line = line.strip()
names.append(line.split(":")[0])
features.append(
[float(x) for x in line.split(":")[-1].split("\t")]
)
if 0.0 < args.clip_features < 100.0:
np.percentile(
features, args.clip_features, axis=0, overwrite_input=True
)
if args.normalize_features:
features = normalize_features(features)
if args.type == "kmeans":
clusters = run_kmeans_clustering(features, names, args)
elif args.type == "hierarchical":
clusters = run_hierarchical_clustering(features, names, args)
elif args.type == "optics":
clusters = run_optics_clustering(features, names, args)
elif args.type == "dbscan":
clusters = run_dbscan_clustering(features, names, args)
ranked_clusters = get_ranked_clusters(clusters)
with open(args.output_file, "w") as f:
for cluster in ranked_clusters:
for (name, rank, score) in ranked_clusters[cluster]:
f.write(
",".join([str(cluster), str(rank), str(score)])
+ ":"
+ str(name)
+ "\n"
)