def eval()

in blink/candidate_retrieval/dataset.py [0:0]


def eval(testset, system_pred, nel=False):
    gold = []
    pred = []

    for doc_name, content in testset.items():
        gold += [c["gold"][0] for c in content]  # the gold named entity
        pred += [
            c["pred"][0] for c in system_pred[doc_name]
        ]  # the predicted named entity

    true_pos = 0
    for g, p in zip(gold, pred):
        if g == p and p != "NIL":
            true_pos += 1

    if nel:
        NIL_preds = len([p for p in pred if p == "NIL"])
        total_discovered_mentions = 0
        for doc_name, content in testset.items():
            total_discovered_mentions += np.sum(
                len(ment) for ment in content[0]["ments_per_sent_flair"]
            )

        precision = true_pos / (total_discovered_mentions - NIL_preds)
    else:
        precision = true_pos / len([p for p in pred if p != "NIL"])

    recall = true_pos / len(gold)
    f1 = 2 * precision * recall / (precision + recall)
    return precision, recall, f1