def siege_setup()

in hypernymysuite/evaluation.py [0:0]


def siege_setup(filename, model):
    """
    Computes Average Precision for a binary dataset.
    """
    ds = Dataset(filename, model.vocab)

    m_val = ds.val_mask
    mi_val = ds.val_inv_mask
    m_test = ds.test_mask
    mi_test = ds.test_inv_mask

    # we only need to compute forward on in-vocab words, speeds things up
    h_inv = model.predict_many(ds.hypos[ds.invocab_mask], ds.hypers[ds.invocab_mask])

    # stub out for the entire data though
    h = np.zeros(len(ds))
    # and fill with our predictions
    h[ds.invocab_mask] = h_inv
    # And OOV predictions should be our lowest validation prediction, essentially
    # always predicting false
    h[ds.oov_mask] = h[mi_val].min()
    y = ds.y
    results = {}

    # Across all relations
    results["other"] = {
        "ap_val": average_precision_score(y[m_val], h[m_val]),
        "ap_test": average_precision_score(y[m_test], h[m_test]),
        "ap100_val": ap_at_k(y[m_val], h[m_val], 100),
        "ap100_test": ap_at_k(y[m_test], h[m_test], 100),
        "ap_val_inv": average_precision_score(y[mi_val], h[mi_val]),
        "ap_test_inv": average_precision_score(y[mi_test], h[mi_test]),
        "ap100_val_inv": ap_at_k(y[mi_val], h[mi_val], 100),
        "ap100_test_inv": ap_at_k(y[mi_test], h[mi_test], 100),
    }
    results["pct_oov"] = np.mean(ds.oov_mask)
    return results