in hypernymysuite/evaluation.py [0:0]
def siege_setup(filename, model):
"""
Computes Average Precision for a binary dataset.
"""
ds = Dataset(filename, model.vocab)
m_val = ds.val_mask
mi_val = ds.val_inv_mask
m_test = ds.test_mask
mi_test = ds.test_inv_mask
# we only need to compute forward on in-vocab words, speeds things up
h_inv = model.predict_many(ds.hypos[ds.invocab_mask], ds.hypers[ds.invocab_mask])
# stub out for the entire data though
h = np.zeros(len(ds))
# and fill with our predictions
h[ds.invocab_mask] = h_inv
# And OOV predictions should be our lowest validation prediction, essentially
# always predicting false
h[ds.oov_mask] = h[mi_val].min()
y = ds.y
results = {}
# Across all relations
results["other"] = {
"ap_val": average_precision_score(y[m_val], h[m_val]),
"ap_test": average_precision_score(y[m_test], h[m_test]),
"ap100_val": ap_at_k(y[m_val], h[m_val], 100),
"ap100_test": ap_at_k(y[m_test], h[m_test], 100),
"ap_val_inv": average_precision_score(y[mi_val], h[mi_val]),
"ap_test_inv": average_precision_score(y[mi_test], h[mi_test]),
"ap100_val_inv": ap_at_k(y[mi_val], h[mi_val], 100),
"ap100_test_inv": ap_at_k(y[mi_test], h[mi_test], 100),
}
results["pct_oov"] = np.mean(ds.oov_mask)
return results