def _prepare_data

def _prepare_data_internal()

in syne_tune/optimizer/schedulers/searchers/bayesopt/gpautograd/learncurve/issm.py [0:0]
69 lines of code
21 McCabe index (conditional complexity)

def _prepare_data_internal(
        state: TuningJobState, data_lst: List[Tuple[Configuration, List, str]],
        configspace_ext: ExtendedConfiguration, active_metric: str,
        do_fantasizing: bool, mean: float,
        std: float) -> (List[Configuration], List[np.ndarray], List[str]):
    r_min, r_max = configspace_ext.resource_attr_range
    configs = [x[0] for x in data_lst]
    trial_ids = [x[2] for x in data_lst]
    targets = []

    fantasized = dict()
    num_fantasy_samples = None
    if do_fantasizing:
        for ev in state.pending_evaluations:
            assert isinstance(ev, FantasizedPendingEvaluation)
            trial_id = ev.trial_id
            entry = (ev.resource, ev.fantasies[active_metric])
            sz = entry[1].size
            if num_fantasy_samples is None:
                num_fantasy_samples = sz
            else:
                assert sz == num_fantasy_samples, \
                    "Number of fantasy samples must be the same for all " +\
                    f"pending evaluations ({sz}, {num_fantasy_samples})"
            if trial_id in fantasized:
                fantasized[trial_id].append(entry)
            else:
                fantasized[trial_id] = [entry]

    trial_ids_done = set()
    for config, observed, trial_id in data_lst:
        # Observations must be from r_min without any missing
        obs_res = [x[0] for x in observed]
        num_obs = len(observed)
        if num_obs > 0:
            test = list(range(r_min, r_min + num_obs))
            assert obs_res == test, \
                f"trial_id {trial_id} has observations at {obs_res}, but " +\
                f"we need them at {test}"
        # Note: Only observed targets are normalized, not fantasized ones
        this_targets = (
            np.array([x[1] for x in observed]).reshape((-1, 1)) - mean) / std
        if do_fantasizing:
            if num_fantasy_samples > 1:
                this_targets = this_targets * np.ones((1, num_fantasy_samples))
            if trial_id in fantasized:
                this_fantasized = sorted(fantasized[trial_id], key=itemgetter(0))
                fanta_res = [x[0] for x in this_fantasized]
                start = r_min + num_obs
                test = list(range(start, start + len(this_fantasized)))
                assert fanta_res == test, \
                    f"trial_id {trial_id} has pending evaluations at {fanta_res}" +\
                    f", but we need them at {test}"
                this_targets = np.vstack(
                    [this_targets] +
                    [x[1].reshape((1, -1)) for x in this_fantasized])
                trial_ids_done.add(trial_id)
        targets.append(this_targets)

    if do_fantasizing:
        # There may be trials with pending evals, but no observes ones
        for trial_id, this_fantasized in fantasized.items():
            if trial_id not in trial_ids_done:
                configs.append(state.config_for_trial[trial_id])
                trial_ids.append(trial_id)
                this_fantasized = sorted(this_fantasized, key=itemgetter(0))
                fanta_res = [x[0] for x in this_fantasized]
                test = list(range(r_min, r_min + len(this_fantasized)))
                assert fanta_res == test, \
                    f"trial_id {trial_id} has pending evaluations at {fanta_res}" + \
                    f", but we need them at {test}"
                this_targets = np.vstack(
                    [x[1].reshape((1, -1)) for x in this_fantasized])
                targets.append(this_targets)

    return configs, targets, trial_ids