def find_optimal_threshold()

in cost-based-ml/cost_based_ml.py [0:0]


def find_optimal_threshold(score_n_true_label, costs):
	just_labels = list(zip(*score_n_true_label)[1])
	class_0_count, class_1_count = np.bincount(just_labels)

	sum_class_0 = 0
	sum_class_1 = 0
	lowest_cost = sys.float_info.max
	best_threshold = 0.0
	threshold_costs = []
	for score, true_label in score_n_true_label:
		true_neg  = sum_class_0
		true_pos  = class_1_count - sum_class_1
		false_neg = sum_class_1
		false_pos = class_0_count - sum_class_0
		threshold_cost = apply_costs(costs, true_neg, true_pos, false_neg, false_pos)
		threshold_costs.append((score, threshold_cost))
		if threshold_cost < lowest_cost:
			best_threshold = score
			lowest_cost = threshold_cost
		if true_label == 0:
			sum_class_0 += 1
		else:
			sum_class_1 += 1
	return best_threshold, lowest_cost, threshold_costs