in biolm/utils_classification.py [0:0]
def hoc_get_p_r_f_arrary(preds, labels, examples):
"""adapted from BLUE benchmark: https://github.com/ncbi-nlp/BLUE_Benchmark/blob/b6216f2cb9bba209ee7028fc874123d8fd5a810c/blue/eval_hoc.py """
threshold = 0.5
cat = 10
test_predict_label = {}
test_true_label = {}
for pred, label, example in zip(preds, labels, examples):
doc_id = example.guid.split('-')[1].split('_')[0]
snum = int(example.guid.split('-')[1].split('_s')[1])
ttl = test_true_label.get(doc_id, [0 for _ in range(10)])
tpl = test_predict_label.get(doc_id, [0 for _ in range(10)])
for ind in range(10):
if pred[ind] > threshold:
tpl[ind] = 1
if label[ind] == 1:
ttl[ind] = 1
test_true_label[doc_id] = ttl
test_predict_label[doc_id] = tpl
doc_ids = list(test_true_label.keys())
acc_list = []
prc_list = []
rec_list = []
f_score_list = []
for doc_id in doc_ids:
label_pred_set = set()
label_gold_set = set()
for j in range(cat):
if test_predict_label[doc_id][j] == 1:
label_pred_set.add(j)
if test_true_label[doc_id][j] == 1:
label_gold_set.add(j)
uni_set = label_gold_set.union(label_pred_set)
intersec_set = label_gold_set.intersection(label_pred_set)
tt = len(intersec_set)
if len(label_pred_set) == 0:
prc = 0
else:
prc = tt / len(label_pred_set)
acc = tt / len(uni_set)
rec = tt / len(label_gold_set)
if prc == 0 and rec == 0:
f_score = 0
else:
f_score = 2 * prc * rec / (prc + rec)
acc_list.append(acc)
prc_list.append(prc)
rec_list.append(rec)
f_score_list.append(f_score)
mean_prc = np.mean(prc_list)
mean_rec = np.mean(rec_list)
def divide(x, y):
return np.true_divide(x, y, out=np.zeros_like(x, dtype=np.float), where=y != 0)
f_score = divide(2 * mean_prc * mean_rec, (mean_prc + mean_rec))
return {'p': mean_prc, 'r': mean_rec, 'f': f_score, 'acc': np.mean(acc_list)}