in rules/predicate.py [0:0]
def equi_freq(values: Set, num_bins: int):
cutoffs = set()
sorted_values = sorted(values)
num_values = len(sorted_values)
# number of bins > number of unique values
if num_bins > num_values:
# Use square-root choice
num_bins = math.ceil(math.sqrt(num_values))
values_ratio = int(num_values / num_bins)
for i in range(0, num_bins):
arr = []
for j in range(i * values_ratio, (i + 1) * values_ratio):
if j >= num_values:
break
arr = arr + [sorted_values[j]]
cutoff = arr[len(arr) - 1]
cutoffs.add(cutoff)
return cutoffs