in experiments/sample_datasets.py [0:0]
def build_sets(corr_errs, avg_spread):
'''
For each severity 3-8, associate a set of 5 severities with it that
best match the average spread, where that severity is the middle of
the five.
Inputs:
corr_errs: dictionary where each key is a string "{corr}-{severity}"
and each value is the test error.
avg_spread: float specifying the average spread to try to match
Output:
dictionary where each key is a string giving the corruption name,
and each value is a list of 5-tuples giving all sets of 5 severities
associated to that corruption.
'''
corrs = sorted(list(set([c.split("-")[0] for c in corr_errs.keys()])))
corr_sets = {c : [] for c in corrs}
for c in corrs:
sevs = sorted([float(i.split("-")[1]) for i in corr_errs.keys() if c == i.split("-")[0]])
for i in np.arange(2, len(sevs)-2):
# Sev 1
best = float('inf')
best_match_s1 = None
for j in np.arange(0, i-1):
sep = corr_errs["{}-{}".format(c, sevs[j])] / corr_errs["{}-{}".format(c, sevs[i])] - 1
sep_sep = abs(-avg_spread - sep)
if sep_sep <= best:
best = sep_sep
best_match_s1 = j
# Sev 2
best = float('inf')
best_match_s2 = None
for j in np.arange(best_match_s1+1, i):
sep = corr_errs["{}-{}".format(c, sevs[j])] / corr_errs["{}-{}".format(c, sevs[i])] - 1
sep_sep = abs(-avg_spread/2 - sep)
if sep_sep <= best:
best = sep_sep
best_match_s2 = j
# Sev 5
best = float('inf')
best_match_s5 = None
for j in np.arange(i+2, len(sevs)):
sep = corr_errs["{}-{}".format(c, sevs[j])] / corr_errs["{}-{}".format(c, sevs[i])] - 1
sep_sep = abs(avg_spread - sep)
if sep_sep <= best:
best = sep_sep
best_match_s5 = j
# Sev 4
best = float('inf')
best_match_s4 = None
for j in np.arange(i+1, best_match_s5):
sep = corr_errs["{}-{}".format(c, sevs[j])] / corr_errs["{}-{}".format(c, sevs[i])] - 1
sep_sep = abs(avg_spread/2 - sep)
if sep_sep <= best:
best = sep_sep
best_match_s4 = j
corr_sets[c].append((sevs[best_match_s1], sevs[best_match_s2], sevs[i], sevs[best_match_s4], sevs[best_match_s5]))
return corr_sets