in hypernymysuite/evaluation.py [0:0]
def correlation_setup(filename, model):
"""
Computes a spearman's rho correlation between model and continuous value.
"""
ds = Dataset(filename, model.vocab, ycolumn="score")
h = model.predict_many(ds.hypos, ds.hypers)
# For OOV words, we should guess the median distance of all the pairs.
# i.e. We're not committing to high or low similarity
h[ds.oov_mask] = np.median(h[ds.train_inv_mask])
y = ds.labels
mi = ds.invocab_mask
m_train = ds.train_mask
mi_train = ds.train_inv_mask
m_val = ds.val_mask
mi_val = ds.val_inv_mask
m_test = ds.test_mask
mi_test = ds.val_inv_mask
return {
"rho_train": scipy.stats.spearmanr(y[m_train], h[m_train])[0],
"rho_val": scipy.stats.spearmanr(y[m_val], h[m_val])[0],
"rho_test": scipy.stats.spearmanr(y[m_test], h[m_test])[0],
"rho_all": scipy.stats.spearmanr(y, h)[0],
"rho_train_inv": scipy.stats.spearmanr(y[mi_train], h[mi_train])[0],
"rho_val_inv": scipy.stats.spearmanr(y[mi_val], h[mi_val])[0],
"rho_test_inv": scipy.stats.spearmanr(y[mi_test], h[mi_test])[0],
"rho_all_inv": scipy.stats.spearmanr(y[mi], h[mi])[0],
"num_all": len(ds),
"num_oov_all": int(sum(ds.oov_mask)),
"pct_oov_all": np.mean(ds.oov_mask),
}