in src/evaluation/wordsim.py [0:0]
def get_spearman_rho(word2id1, embeddings1, path, lower,
word2id2=None, embeddings2=None):
"""
Compute monolingual or cross-lingual word similarity score.
"""
assert not ((word2id2 is None) ^ (embeddings2 is None))
word2id2 = word2id1 if word2id2 is None else word2id2
embeddings2 = embeddings1 if embeddings2 is None else embeddings2
assert len(word2id1) == embeddings1.shape[0]
assert len(word2id2) == embeddings2.shape[0]
assert type(lower) is bool
word_pairs = get_word_pairs(path)
not_found = 0
pred = []
gold = []
for word1, word2, similarity in word_pairs:
id1 = get_word_id(word1, word2id1, lower)
id2 = get_word_id(word2, word2id2, lower)
if id1 is None or id2 is None:
not_found += 1
continue
u = embeddings1[id1]
v = embeddings2[id2]
score = u.dot(v) / (np.linalg.norm(u) * np.linalg.norm(v))
gold.append(similarity)
pred.append(score)
return spearmanr(gold, pred).correlation, len(gold), not_found