def build_corr

def build_corr_tables()

in experiments/sample_datasets.py [0:0]
40 lines of code
5 McCabe index (conditional complexity)

def build_corr_tables(corr_sets, corr_errs, corr_features):
    '''
    Given a list of corruption severity sets, builds the tables that
    will be used to calculate distance.  For each corruption, the tables
    are ordered in increasing order, since this is required to make the
    dataset selection algorithm more efficient.  This ordering is also
    returned so it can be reversed at the end.

    Inputs:
    corr_sets: dictionary of corruption keys with lists of severity set
      values
    corr_errs: dictionary of keys with the form '{corr}-{severity}' and
      values that are the errors on that corruption/severity pair
    corr_features: dictionary of keys with the form '{corr}-{severity}'
      and values that are the features on that corruption/severity pair

    Outputs:
    1. A list of all corruption strings, in the order they appear the
       table.
    2. dictionary where the keys are corruption strings, and the values
       are the severity sets ordered by increasing corruption error.
    3. 2d numpy array with the shape [corruption, severity_set] that
       gives the average error on that severity set and corruption
    4. 4d numpy array with the shape 
       [corruption, severity_set, severity, features]
    '''

    corrs = list(corr_sets.keys())
    ordered = {}
    len_feats = len(list(corr_features.values())[0])
    err_table = np.zeros((len(corrs), len(corr_sets[corrs[0]])))
    feat_table = np.zeros((len(corrs), len(corr_sets[corrs[0]]), len(corr_sets[corrs[0]][0]), len_feats))
    for i, corr in enumerate(corrs):
        curr_errs = np.zeros(len(corr_sets[corr]))
        curr_feats = np.zeros((len(corr_sets[corr]), len(corr_sets[corrs[0]][0]), len_feats))
        for j, sev_list in enumerate(corr_sets[corr]):
            curr_errs[j] = np.mean([corr_errs["{}-{}".format(corr, s)] for s in sev_list])
            curr_feats[j,:,:] = np.array([corr_features["{}-{}".format(corr, s)] for s in sev_list])
        sev_order = np.argsort(curr_errs)
        err_table[i,:] = np.sort(curr_errs)
        feat_table[i, :, :, :] = curr_feats[sev_order, :, :]
        ordered[corr] = np.array(corr_sets[corr])[sev_order]
    return corrs, ordered, err_table, feat_table