in benchmarking/blackbox_repository/conversion_scripts/scripts/nasbench201_import.py [0:0]
def convert_dataset(data, dataset):
hp_cols = list(CONFIG_KEYS)
hps = dict()
for h in hp_cols:
hps[h] = []
n_hps = data['total_archs']
for i in range(n_hps):
config = str_to_list(data['arch2infos'][i]['200']['arch_str'])
for j, hp in enumerate(config):
hps[CONFIG_KEYS[j]].append(hp)
hyperparameters = pd.DataFrame(
data=hps,
columns=hp_cols
)
objective_names = [
'valid_error',
'train_error',
'runtime',
'latency',
'flops',
'params',
]
fidelity_values = np.arange(1, 201)
n_fidelities = len(fidelity_values)
n_objectives = len(objective_names)
n_seeds = 3
objective_evaluations = np.empty((n_hps, n_seeds, n_fidelities, n_objectives)).astype('float32')
name_index = {name: i for i, name in enumerate(objective_names)}
def save_objective_values_helper(name, values):
assert values.shape == (n_hps, n_seeds, n_fidelities)
objective_evaluations[..., name_index[name]] = values
ve = np.empty((n_hps, n_seeds, n_fidelities)).astype('float32')
te = np.empty((n_hps, n_seeds, n_fidelities)).astype('float32')
rt = np.empty((n_hps, n_seeds, n_fidelities)).astype('float32')
for ai in range(n_hps):
for si, seed in enumerate([777, 888, 999]):
try:
entry = data['arch2infos'][ai]['200']['all_results'][(dataset, seed)]
validation_error = [1 - entry['eval_acc1es']['ori-test@%d' % ei] / 100 for ei in range(n_fidelities)]
train_error = [1 - entry['train_acc1es'][ei] / 100 for ei in range(n_fidelities)]
# runtime measure the time for a single epoch
runtime = [entry['train_times'][ei] + entry['eval_times']['ori-test@%d' % ei]
for ei in range(n_fidelities)]
except KeyError:
validation_error = [np.nan] * n_fidelities
train_error = [np.nan] * n_fidelities
runtime = [np.nan] * n_fidelities
ve[ai, si, :] = validation_error
te[ai, si, :] = train_error
rt[ai, si, :] = runtime
def impute(values):
idx = np.isnan(values)
a, s, e = np.where(idx == True)
for ai, si, ei in zip(a, s, e):
l = values[ai, :, ei]
m = np.mean(np.delete(l, si))
values[ai, si, ei] = m
return values
# The original data contains missing values, since not all architectures were evaluated for all three seeds
# We impute these missing values by taking the average of the available datapoints for the corresponding
# architecture and time step
save_objective_values_helper('valid_error', impute(ve))
save_objective_values_helper('train_error', impute(te))
save_objective_values_helper('runtime', impute(rt))
latency = np.array(
[data['arch2infos'][ai]['200']['all_results'][(dataset, 777)]['latency'][0] for ai in range(n_hps)])
latency = np.repeat(np.expand_dims(latency, axis=-1), n_seeds, axis=-1)
latency = np.repeat(np.expand_dims(latency, axis=-1), n_fidelities, axis=-1)
save_objective_values_helper('latency', latency)
flops = np.array([data['arch2infos'][ai]['200']['all_results'][(dataset, 777)]['flop'] for ai in range(n_hps)])
flops = np.repeat(np.expand_dims(flops, axis=-1), n_seeds, axis=-1)
flops = np.repeat(np.expand_dims(flops, axis=-1), n_fidelities, axis=-1)
save_objective_values_helper('flops', flops)
params = np.array([data['arch2infos'][ai]['200']['all_results'][(dataset, 777)]['params'] for ai in range(n_hps)])
params = np.repeat(np.expand_dims(params, axis=-1), n_seeds, axis=-1)
params = np.repeat(np.expand_dims(params, axis=-1), n_fidelities, axis=-1)
save_objective_values_helper('params', params)
configuration_space = {
node: search_space.choice(['avg_pool_3x3', 'nor_conv_3x3', 'skip_connect', 'nor_conv_1x1', 'none'])
for node in hp_cols
}
fidelity_space = {
RESOURCE_ATTR: search_space.randint(lower=1, upper=201)
}
objective_names = [f"metric_{m}" for m in objective_names]
# Sanity checks:
assert objective_names[0] == METRIC_VALID_ERROR
assert objective_names[2] == METRIC_TIME_THIS_RESOURCE
return BlackboxTabular(
hyperparameters=hyperparameters,
configuration_space=configuration_space,
fidelity_space=fidelity_space,
objectives_evaluations=objective_evaluations,
fidelity_values=fidelity_values,
objectives_names=objective_names,
)