in experiments/sample_datasets.py [0:0]
def build_corr_tables(corr_sets, corr_errs, corr_features):
'''
Given a list of corruption severity sets, builds the tables that
will be used to calculate distance. For each corruption, the tables
are ordered in increasing order, since this is required to make the
dataset selection algorithm more efficient. This ordering is also
returned so it can be reversed at the end.
Inputs:
corr_sets: dictionary of corruption keys with lists of severity set
values
corr_errs: dictionary of keys with the form '{corr}-{severity}' and
values that are the errors on that corruption/severity pair
corr_features: dictionary of keys with the form '{corr}-{severity}'
and values that are the features on that corruption/severity pair
Outputs:
1. A list of all corruption strings, in the order they appear the
table.
2. dictionary where the keys are corruption strings, and the values
are the severity sets ordered by increasing corruption error.
3. 2d numpy array with the shape [corruption, severity_set] that
gives the average error on that severity set and corruption
4. 4d numpy array with the shape
[corruption, severity_set, severity, features]
'''
corrs = list(corr_sets.keys())
ordered = {}
len_feats = len(list(corr_features.values())[0])
err_table = np.zeros((len(corrs), len(corr_sets[corrs[0]])))
feat_table = np.zeros((len(corrs), len(corr_sets[corrs[0]]), len(corr_sets[corrs[0]][0]), len_feats))
for i, corr in enumerate(corrs):
curr_errs = np.zeros(len(corr_sets[corr]))
curr_feats = np.zeros((len(corr_sets[corr]), len(corr_sets[corrs[0]][0]), len_feats))
for j, sev_list in enumerate(corr_sets[corr]):
curr_errs[j] = np.mean([corr_errs["{}-{}".format(corr, s)] for s in sev_list])
curr_feats[j,:,:] = np.array([corr_features["{}-{}".format(corr, s)] for s in sev_list])
sev_order = np.argsort(curr_errs)
err_table[i,:] = np.sort(curr_errs)
feat_table[i, :, :, :] = curr_feats[sev_order, :, :]
ordered[corr] = np.array(corr_sets[corr])[sev_order]
return corrs, ordered, err_table, feat_table