def generate_feature_matrix()

in src/generate_matrices.py [0:0]


def generate_feature_matrix(numsamples, numdims, numgroups, num_uniform_features, grouplabels, mean_range, variability):
    """
    :param numsamples: Total number of samples
    :param numdims: total dimensionality (number of columns)
    :param num_uniform_features: how many of the distributions for each groups should be uniform rather than normal
    :param numgroups: number of groups
    :param grouplabels: array of grouplabels
    :param mean_range: the mean of each distribution is selected uniformly at random from [-mean_range, mean_range]
    :param variability: standard deviation for normal or distance from center to upper/lower bound on uniform
    :return: X, matrix of features where each groups has a unique distribution for each feature
    """

    # If we are using a vanilla dataset, just use unit normal for all features for all groups
    if mean_range == 0 and variability == 1 and num_uniform_features == 0:
        return np.random.randn(numsamples, numdims)

    # Instantiate a feature matrix to be eventually returned once filled with non-zero values
    X = np.zeros((numsamples, numdims))  # Instantiate an empty feature matrix

    # Each groups has its own set of "numdims" distributions, defined by choice of normal/uniform, mean, and variability
    # Then, we populate each groups features by sampling a row vector for each groups member, where each elemeent
    # of this row vector is selected from one of the numdims pre-defined distributions. In practice, we may do this,
    # column by column.

    # Create a list  of tuples for each groups
    # Each list contains numdims 3-tuples, with each tuple defining a unique distribution
    for g in range(0, numgroups):
        # Tuple will store (is_uniform, mean, variability (std. dev or distance from endpoint to center in uniform))
        # The last num_uniform_features features have a 1 in first position indicating uniform, rest are 0 for normal
        distribution_attributes = \
            [(i >= (numdims - num_uniform_features), np.random.uniform(-mean_range, mean_range), variability)
             for i in range(numdims)]
        # Mask the rows of X corresponding to the members of the current groups and populate accordingly
        idx = np.where(grouplabels == g)
        X[idx, :] = generate_group_features(distribution_attributes, np.size(idx))
    return X