def bibless_setup()

in hypernymysuite/evaluation.py [0:0]


def bibless_setup(model):
    """
    Combined detection with a threshold, plus direction prediction.
    """
    ds = Dataset(os.path.join(DATA_DIR, "bibless.tsv"), model.vocab)

    # Ensure we always get the same results
    rng = np.random.RandomState(42)
    VAL_PROB = .02
    NUM_TRIALS = 1000

    # We have no way of handling oov
    y = ds.y[ds.invocab_mask]

    # hypernymy could be either direction
    yh = y != 0

    # get forward and backward predictions
    hf = model.predict_many(ds.hypos[ds.invocab_mask], ds.hypers[ds.invocab_mask])
    hr = model.predict_many(ds.hypers[ds.invocab_mask], ds.hypos[ds.invocab_mask])
    h = np.max([hf, hr], axis=0)

    dir_pred = 2 * np.float32(hf >= hr) - 1

    val_scores = []
    test_scores = []
    for _ in range(NUM_TRIALS):
        # Generate a new mask every time
        m_val = rng.rand(len(y)) < VAL_PROB
        # Test is everything except val
        m_test = ~m_val

        # set the threshold based on the maximum score
        _, _, t = precision_recall_curve(yh[m_val], h[m_val])
        thr_accs = np.mean((h[m_val, np.newaxis] >= t) == yh[m_val, np.newaxis], axis=0)
        best_t = t[thr_accs.argmax()]

        det_preds_val = h[m_val] >= best_t
        det_preds_test = h[m_test] >= best_t

        fin_preds_val = det_preds_val * dir_pred[m_val]
        fin_preds_test = det_preds_test * dir_pred[m_test]

        val_scores.append(np.mean(fin_preds_val == y[m_val]))
        test_scores.append(np.mean(fin_preds_test == y[m_test]))

    # report average across many folds
    return {"acc_val_inv": np.mean(val_scores), "acc_test_inv": np.mean(test_scores)}