in syne_tune/optimizer/schedulers/searchers/bayesopt/gpautograd/learncurve/issm.py [0:0]
def _prepare_data_internal(
state: TuningJobState, data_lst: List[Tuple[Configuration, List, str]],
configspace_ext: ExtendedConfiguration, active_metric: str,
do_fantasizing: bool, mean: float,
std: float) -> (List[Configuration], List[np.ndarray], List[str]):
r_min, r_max = configspace_ext.resource_attr_range
configs = [x[0] for x in data_lst]
trial_ids = [x[2] for x in data_lst]
targets = []
fantasized = dict()
num_fantasy_samples = None
if do_fantasizing:
for ev in state.pending_evaluations:
assert isinstance(ev, FantasizedPendingEvaluation)
trial_id = ev.trial_id
entry = (ev.resource, ev.fantasies[active_metric])
sz = entry[1].size
if num_fantasy_samples is None:
num_fantasy_samples = sz
else:
assert sz == num_fantasy_samples, \
"Number of fantasy samples must be the same for all " +\
f"pending evaluations ({sz}, {num_fantasy_samples})"
if trial_id in fantasized:
fantasized[trial_id].append(entry)
else:
fantasized[trial_id] = [entry]
trial_ids_done = set()
for config, observed, trial_id in data_lst:
# Observations must be from r_min without any missing
obs_res = [x[0] for x in observed]
num_obs = len(observed)
if num_obs > 0:
test = list(range(r_min, r_min + num_obs))
assert obs_res == test, \
f"trial_id {trial_id} has observations at {obs_res}, but " +\
f"we need them at {test}"
# Note: Only observed targets are normalized, not fantasized ones
this_targets = (
np.array([x[1] for x in observed]).reshape((-1, 1)) - mean) / std
if do_fantasizing:
if num_fantasy_samples > 1:
this_targets = this_targets * np.ones((1, num_fantasy_samples))
if trial_id in fantasized:
this_fantasized = sorted(fantasized[trial_id], key=itemgetter(0))
fanta_res = [x[0] for x in this_fantasized]
start = r_min + num_obs
test = list(range(start, start + len(this_fantasized)))
assert fanta_res == test, \
f"trial_id {trial_id} has pending evaluations at {fanta_res}" +\
f", but we need them at {test}"
this_targets = np.vstack(
[this_targets] +
[x[1].reshape((1, -1)) for x in this_fantasized])
trial_ids_done.add(trial_id)
targets.append(this_targets)
if do_fantasizing:
# There may be trials with pending evals, but no observes ones
for trial_id, this_fantasized in fantasized.items():
if trial_id not in trial_ids_done:
configs.append(state.config_for_trial[trial_id])
trial_ids.append(trial_id)
this_fantasized = sorted(this_fantasized, key=itemgetter(0))
fanta_res = [x[0] for x in this_fantasized]
test = list(range(r_min, r_min + len(this_fantasized)))
assert fanta_res == test, \
f"trial_id {trial_id} has pending evaluations at {fanta_res}" + \
f", but we need them at {test}"
this_targets = np.vstack(
[x[1].reshape((1, -1)) for x in this_fantasized])
targets.append(this_targets)
return configs, targets, trial_ids