in cost-based-ml/cost_based_ml.py [0:0]
def find_optimal_threshold(score_n_true_label, costs):
just_labels = list(zip(*score_n_true_label)[1])
class_0_count, class_1_count = np.bincount(just_labels)
sum_class_0 = 0
sum_class_1 = 0
lowest_cost = sys.float_info.max
best_threshold = 0.0
threshold_costs = []
for score, true_label in score_n_true_label:
true_neg = sum_class_0
true_pos = class_1_count - sum_class_1
false_neg = sum_class_1
false_pos = class_0_count - sum_class_0
threshold_cost = apply_costs(costs, true_neg, true_pos, false_neg, false_pos)
threshold_costs.append((score, threshold_cost))
if threshold_cost < lowest_cost:
best_threshold = score
lowest_cost = threshold_cost
if true_label == 0:
sum_class_0 += 1
else:
sum_class_1 += 1
return best_threshold, lowest_cost, threshold_costs