def generate_random_intervals()

in src/generate_matrices.py [0:0]


def generate_random_intervals(n_samples, n_subgroups, min_size, max_repititions=10000):
    """
    The following randomized algorithm partitions an array of length `n_samples` into k = `n_subgroups` contiguous
    regions by randomly placing k-1 dividing positions in the array. Every interval must have size >= `min_size`,
    or the algorithm is repeated up to `max_repititions` times. If we fail 10000 times, an exception is raised.
    :return: List of n_subgroups "sizes" of intervals in order which sum to n_sumples
    """
    if n_samples / n_subgroups < min_size:
        raise Exception(f"ERROR: Cannot subdivide {n_samples} instances into {n_subgroups} such that all subgroups "
                        f"have size at least {min_size}.")

    attempts = 0
    while attempts < max_repititions:
        # Determines the positions of the dividers randomly
        d = sorted(np.random.randint(n_samples, size=n_subgroups - 1))
        # The ith size is defined by the ith divider minus position of i-1th divider.
        # In total the inner array will give us n_samples - 2, and we add the outer and inner interval
        subgroup_sizes = [d[0]] + [(d[i] - d[i - 1]) for i in range(1, len(d))] + [n_samples - d[-1]]

        # If no subgroup is too small, return the sizes, otheriwse we will repeat
        if min(subgroup_sizes) >= min_size:
            return subgroup_sizes

    raise Exception(f'We failed to find a valid partition of {n_samples} elements into {n_subgroups} groups such that'
                    f'each groups had size >= {min_size} after {max_repititions} attempts of our randomized algorithm.'
                    f' Please lower the minimum'
                    f'threshold, increase the number of samples, or decrease the number of subgroups and try again.')