def correlation_setup()

in hypernymysuite/evaluation.py [0:0]


def correlation_setup(filename, model):
    """
    Computes a spearman's rho correlation between model and continuous value.
    """
    ds = Dataset(filename, model.vocab, ycolumn="score")

    h = model.predict_many(ds.hypos, ds.hypers)
    # For OOV words, we should guess the median distance of all the pairs.
    # i.e. We're not committing to high or low similarity
    h[ds.oov_mask] = np.median(h[ds.train_inv_mask])

    y = ds.labels
    mi = ds.invocab_mask
    m_train = ds.train_mask
    mi_train = ds.train_inv_mask
    m_val = ds.val_mask
    mi_val = ds.val_inv_mask
    m_test = ds.test_mask
    mi_test = ds.val_inv_mask

    return {
        "rho_train": scipy.stats.spearmanr(y[m_train], h[m_train])[0],
        "rho_val": scipy.stats.spearmanr(y[m_val], h[m_val])[0],
        "rho_test": scipy.stats.spearmanr(y[m_test], h[m_test])[0],
        "rho_all": scipy.stats.spearmanr(y, h)[0],
        "rho_train_inv": scipy.stats.spearmanr(y[mi_train], h[mi_train])[0],
        "rho_val_inv": scipy.stats.spearmanr(y[mi_val], h[mi_val])[0],
        "rho_test_inv": scipy.stats.spearmanr(y[mi_test], h[mi_test])[0],
        "rho_all_inv": scipy.stats.spearmanr(y[mi], h[mi])[0],
        "num_all": len(ds),
        "num_oov_all": int(sum(ds.oov_mask)),
        "pct_oov_all": np.mean(ds.oov_mask),
    }