in blink/candidate_retrieval/dataset.py [0:0]
def eval(testset, system_pred, nel=False):
gold = []
pred = []
for doc_name, content in testset.items():
gold += [c["gold"][0] for c in content] # the gold named entity
pred += [
c["pred"][0] for c in system_pred[doc_name]
] # the predicted named entity
true_pos = 0
for g, p in zip(gold, pred):
if g == p and p != "NIL":
true_pos += 1
if nel:
NIL_preds = len([p for p in pred if p == "NIL"])
total_discovered_mentions = 0
for doc_name, content in testset.items():
total_discovered_mentions += np.sum(
len(ment) for ment in content[0]["ments_per_sent_flair"]
)
precision = true_pos / (total_discovered_mentions - NIL_preds)
else:
precision = true_pos / len([p for p in pred if p != "NIL"])
recall = true_pos / len(gold)
f1 = 2 * precision * recall / (precision + recall)
return precision, recall, f1