in src/generate_matrices.py [0:0]
def generate_random_intervals(n_samples, n_subgroups, min_size, max_repititions=10000):
"""
The following randomized algorithm partitions an array of length `n_samples` into k = `n_subgroups` contiguous
regions by randomly placing k-1 dividing positions in the array. Every interval must have size >= `min_size`,
or the algorithm is repeated up to `max_repititions` times. If we fail 10000 times, an exception is raised.
:return: List of n_subgroups "sizes" of intervals in order which sum to n_sumples
"""
if n_samples / n_subgroups < min_size:
raise Exception(f"ERROR: Cannot subdivide {n_samples} instances into {n_subgroups} such that all subgroups "
f"have size at least {min_size}.")
attempts = 0
while attempts < max_repititions:
# Determines the positions of the dividers randomly
d = sorted(np.random.randint(n_samples, size=n_subgroups - 1))
# The ith size is defined by the ith divider minus position of i-1th divider.
# In total the inner array will give us n_samples - 2, and we add the outer and inner interval
subgroup_sizes = [d[0]] + [(d[i] - d[i - 1]) for i in range(1, len(d))] + [n_samples - d[-1]]
# If no subgroup is too small, return the sizes, otheriwse we will repeat
if min(subgroup_sizes) >= min_size:
return subgroup_sizes
raise Exception(f'We failed to find a valid partition of {n_samples} elements into {n_subgroups} groups such that'
f'each groups had size >= {min_size} after {max_repititions} attempts of our randomized algorithm.'
f' Please lower the minimum'
f'threshold, increase the number of samples, or decrease the number of subgroups and try again.')