in src/evaluation/wordsim.py [0:0]
def get_crosslingual_wordsim_scores(lang1, word2id1, embeddings1,
lang2, word2id2, embeddings2, lower=True):
"""
Return cross-lingual word similarity scores.
"""
f1 = os.path.join(SEMEVAL17_EVAL_PATH, '%s-%s-SEMEVAL17.txt' % (lang1, lang2))
f2 = os.path.join(SEMEVAL17_EVAL_PATH, '%s-%s-SEMEVAL17.txt' % (lang2, lang1))
if not (os.path.exists(f1) or os.path.exists(f2)):
return None
if os.path.exists(f1):
coeff, found, not_found = get_spearman_rho(
word2id1, embeddings1, f1,
lower, word2id2, embeddings2
)
elif os.path.exists(f2):
coeff, found, not_found = get_spearman_rho(
word2id2, embeddings2, f2,
lower, word2id1, embeddings1
)
scores = {}
separator = "=" * (30 + 1 + 10 + 1 + 13 + 1 + 12)
pattern = "%30s %10s %13s %12s"
logger.info(separator)
logger.info(pattern % ("Dataset", "Found", "Not found", "Rho"))
logger.info(separator)
task_name = '%s_%s_SEMEVAL17' % (lang1.upper(), lang2.upper())
logger.info(pattern % (task_name, str(found), str(not_found), "%.4f" % coeff))
scores[task_name] = coeff
if not scores:
return None
logger.info(separator)
return scores