in backup/analysis.py [0:0]
def semantic_cluster(data: Data):
"""
Idea: Cluster in Semantic space and measure the change in conversation
:param data:
:return:
"""
labels = []
tokens = []
for di, dial in enumerate(data.dial_vecs):
tokens.extend([d.numpy() for d in dial])
labels.extend(["{}.{}".format(di, p) for p in range(len(dial))])
tokens = np.array(tokens)
kmeans_clustering = KMeans(n_clusters=5)
idx = kmeans_clustering.fit_predict(tokens)
clusters = list(range(5))
cos_distances = {}
eu_distances = {}
cb_distances = {}
for i in it.permutations(clusters, 2):
cos_distances[i] = cosine_similarity(
kmeans_clustering.cluster_centers_[i[0]],
kmeans_clustering.cluster_centers_[i[1]],
)
eu_distances[i] = euclidean(
kmeans_clustering.cluster_centers_[i[0]],
kmeans_clustering.cluster_centers_[i[1]],
)
cb_distances[i] = cityblock(
kmeans_clustering.cluster_centers_[i[0]],
kmeans_clustering.cluster_centers_[i[1]],
)
# compute
dist_traversed = []
flips = []
prev_di = ""
dt = 0
fp = 0
pc = -1
for ti, tok in enumerate(tokens):
di, ui = labels[ti].split(".")
center = kmeans_clustering.labels_[ti]
if prev_di == di:
if pc > -1:
if pc != center:
dt += cos_distances[(pc, center)]
fp += 1
else:
if dt > 0:
dist_traversed.append(dt)
flips.append(fp)
dt = 0
fp = 0
prev_di = di
pc = center
print("Flips : Mean : {}, STD: {}".format(np.mean(flips), np.std(flips)))
print(
"Distance traversed : Mean : {}, STD : {}".format(
np.mean(dist_traversed), np.std(dist_traversed)
)
)