def get_crosslingual_wordsim

def get_crosslingual_wordsim_scores()

in src/evaluation/wordsim.py [0:0]

29 lines of code
6 McCabe index (conditional complexity)


def get_crosslingual_wordsim_scores(lang1, word2id1, embeddings1,
                                    lang2, word2id2, embeddings2, lower=True):
    """
    Return cross-lingual word similarity scores.
    """
    f1 = os.path.join(SEMEVAL17_EVAL_PATH, '%s-%s-SEMEVAL17.txt' % (lang1, lang2))
    f2 = os.path.join(SEMEVAL17_EVAL_PATH, '%s-%s-SEMEVAL17.txt' % (lang2, lang1))
    if not (os.path.exists(f1) or os.path.exists(f2)):
        return None

    if os.path.exists(f1):
        coeff, found, not_found = get_spearman_rho(
            word2id1, embeddings1, f1,
            lower, word2id2, embeddings2
        )
    elif os.path.exists(f2):
        coeff, found, not_found = get_spearman_rho(
            word2id2, embeddings2, f2,
            lower, word2id1, embeddings1
        )

    scores = {}
    separator = "=" * (30 + 1 + 10 + 1 + 13 + 1 + 12)
    pattern = "%30s %10s %13s %12s"
    logger.info(separator)
    logger.info(pattern % ("Dataset", "Found", "Not found", "Rho"))
    logger.info(separator)

    task_name = '%s_%s_SEMEVAL17' % (lang1.upper(), lang2.upper())
    logger.info(pattern % (task_name, str(found), str(not_found), "%.4f" % coeff))
    scores[task_name] = coeff
    if not scores:
        return None
    logger.info(separator)

    return scores