in hypernymysuite/evaluation.py [0:0]
def bibless_setup(model):
"""
Combined detection with a threshold, plus direction prediction.
"""
ds = Dataset(os.path.join(DATA_DIR, "bibless.tsv"), model.vocab)
# Ensure we always get the same results
rng = np.random.RandomState(42)
VAL_PROB = .02
NUM_TRIALS = 1000
# We have no way of handling oov
y = ds.y[ds.invocab_mask]
# hypernymy could be either direction
yh = y != 0
# get forward and backward predictions
hf = model.predict_many(ds.hypos[ds.invocab_mask], ds.hypers[ds.invocab_mask])
hr = model.predict_many(ds.hypers[ds.invocab_mask], ds.hypos[ds.invocab_mask])
h = np.max([hf, hr], axis=0)
dir_pred = 2 * np.float32(hf >= hr) - 1
val_scores = []
test_scores = []
for _ in range(NUM_TRIALS):
# Generate a new mask every time
m_val = rng.rand(len(y)) < VAL_PROB
# Test is everything except val
m_test = ~m_val
# set the threshold based on the maximum score
_, _, t = precision_recall_curve(yh[m_val], h[m_val])
thr_accs = np.mean((h[m_val, np.newaxis] >= t) == yh[m_val, np.newaxis], axis=0)
best_t = t[thr_accs.argmax()]
det_preds_val = h[m_val] >= best_t
det_preds_test = h[m_test] >= best_t
fin_preds_val = det_preds_val * dir_pred[m_val]
fin_preds_test = det_preds_test * dir_pred[m_test]
val_scores.append(np.mean(fin_preds_val == y[m_val]))
test_scores.append(np.mean(fin_preds_test == y[m_test]))
# report average across many folds
return {"acc_val_inv": np.mean(val_scores), "acc_test_inv": np.mean(test_scores)}