in syne_tune/optimizer/schedulers/searchers/bayesopt/gpautograd/learncurve/issm.py [0:0]
def prepare_data_with_pending(
state: TuningJobState, configspace_ext: ExtendedConfiguration,
active_metric: str, normalize_targets: bool = False) -> (Dict, Dict):
"""
Similar to `prepare_data` with `do_fantasizing=False`, but two dicts are
returned, the first for trials without pending evaluations, the second
for trials with pending evaluations. The latter dict also contains trials
which have pending, but no observed evaluations.
The second dict has the additional entry `num_pending`, which lists the
number of pending evals for each trial. These evals must be contiguous and
adjacent with observed evals, so that the union of observed and pending
evals are contiguous (when it comes to resource levels).
:param state: See `prepare_data`
:param configspace_ext: See `prepare_data`
:param active_metric: See `prepare_data`
:param normalize_targets: See `prepare_data`
:return: See above
"""
r_min, r_max = configspace_ext.resource_attr_range
hp_ranges = configspace_ext.hp_ranges
data1_lst = [] # trials without pending evals
data2_lst = [] # trials with pending evals
num_pending = []
num_pending_for_trial = Counter(
ev.trial_id for ev in state.pending_evaluations)
targets = []
done_trial_ids = set()
for ev in state.trials_evaluations:
tpl = _create_tuple(ev, active_metric, state.config_for_trial)
_, observed, trial_id = tpl
if trial_id not in num_pending_for_trial:
data1_lst.append(tpl)
else:
data2_lst.append(tpl)
num_pending.append(num_pending_for_trial[trial_id])
done_trial_ids.add(trial_id)
targets += [x[1] for x in observed]
mean = 0.0
std = 1.0
if normalize_targets:
std = max(np.std(targets), 1e-9)
mean = np.mean(targets)
# There may be trials with pending evaluations, but no observed ones
for ev in state.pending_evaluations:
trial_id = ev.trial_id
if trial_id not in done_trial_ids:
config = state.config_for_trial[trial_id]
data2_lst.append((config, [], trial_id))
num_pending.append(num_pending_for_trial[trial_id])
results = ()
with_pending = False
for data_lst in (data1_lst, data2_lst):
configs, targets, trial_ids = _prepare_data_internal(
state=state,
data_lst=data_lst,
configspace_ext=configspace_ext,
active_metric=active_metric,
do_fantasizing=False,
mean=mean, std=std)
if configs:
# Sort in decreasing order w.r.t. number of targets
if not with_pending:
configs, targets, trial_ids = zip(*sorted(
zip(configs, targets, trial_ids),
key=lambda x: -x[1].shape[0]))
else:
configs, targets, num_pending, trial_ids = zip(*sorted(
zip(configs, targets, num_pending, trial_ids),
key=lambda x: -x[1].shape[0]))
features = hp_ranges.to_ndarray_matrix(configs)
else:
# It is possible that `data1_lst` is empty
features = None
result = {
'configs': list(configs),
'features': features,
'targets': list(targets),
'trial_ids': list(trial_ids),
'r_min': r_min,
'r_max': r_max,
'do_fantasizing': False}
if with_pending:
result['num_pending'] = num_pending
if normalize_targets:
result['mean_targets'] = mean
result['std_targets'] = std
results = results + (result,)
with_pending = True
return results