def get_spearman

def get_spearman_rho()

in src/evaluation/wordsim.py [0:0]

24 lines of code
6 McCabe index (conditional complexity)


def get_spearman_rho(word2id1, embeddings1, path, lower,
                     word2id2=None, embeddings2=None):
    """
    Compute monolingual or cross-lingual word similarity score.
    """
    assert not ((word2id2 is None) ^ (embeddings2 is None))
    word2id2 = word2id1 if word2id2 is None else word2id2
    embeddings2 = embeddings1 if embeddings2 is None else embeddings2
    assert len(word2id1) == embeddings1.shape[0]
    assert len(word2id2) == embeddings2.shape[0]
    assert type(lower) is bool
    word_pairs = get_word_pairs(path)
    not_found = 0
    pred = []
    gold = []
    for word1, word2, similarity in word_pairs:
        id1 = get_word_id(word1, word2id1, lower)
        id2 = get_word_id(word2, word2id2, lower)
        if id1 is None or id2 is None:
            not_found += 1
            continue
        u = embeddings1[id1]
        v = embeddings2[id2]
        score = u.dot(v) / (np.linalg.norm(u) * np.linalg.norm(v))
        gold.append(similarity)
        pred.append(score)
    return spearmanr(gold, pred).correlation, len(gold), not_found