in bayesmark/experiment_aggregate.py [0:0]
def concat_experiments(all_experiments, ravel=False):
"""Aggregate the Datasets from a series of experiments into combined Dataset.
Parameters
----------
all_experiments : typing.Iterable
Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is
a pair containing ``(meta_data, data)``. See `load_experiments` for details on these variables,
ravel : bool
If true, ravel all studies to store batch suggestions as if they were serial.
Returns
-------
all_perf : :class:`xarray:xarray.Dataset`
DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included
as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the
`uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`,
`METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel`
is true, then the `SUGGEST` is singleton.
all_time : :class:`xarray:xarray.Dataset`
Dataset containing all of the `time_ds` from the experiments. The new dimensions are
``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`.
all_suggest : :class:`xarray:xarray.Dataset`
DataArray containing all of the `suggest_ds` from the experiments. It has dimensions
``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``.
all_sigs : dict(str, list(list(float)))
Aggregate of all experiment signatures.
"""
all_perf = {}
all_time = {}
all_suggest = {}
all_sigs = {}
trial_counter = Counter()
for (test_case, optimizer, uuid), (perf_ds, time_ds, suggest_ds, sig) in all_experiments:
if ravel:
raise NotImplementedError("ravel is deprecated. Just reshape in analysis steps instead.")
case_key = (test_case, optimizer, trial_counter[(test_case, optimizer)])
trial_counter[(test_case, optimizer)] += 1
# Process perf data
assert all(perf_ds[kk].dims == (ITER, SUGGEST) for kk in perf_ds)
all_perf[case_key] = perf_ds
# Process time data
all_time[case_key] = summarize_time(time_ds)
# Process suggestion data
all_suggest_curr = all_suggest.setdefault(test_case, {})
all_suggest_curr[case_key] = suggest_ds
# Handle the signatures
all_sigs.setdefault(test_case, []).append(sig)
assert min(trial_counter.values()) == max(trial_counter.values()), "Uneven number of trials per test case"
# Now need to concat dict of datasets into single dataset
all_perf = xru.ds_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL))
assert all(all_perf[kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) for kk in all_perf)
assert not any(
np.any(np.isnan(all_perf[kk].values)) for kk in all_perf
), "Missing combinations of method and test case"
all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL))
assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL) for kk in all_time)
assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time)
assert xru.coord_compat((all_perf, all_time), (ITER, TEST_CASE, METHOD, TRIAL))
for test_case in all_suggest:
all_suggest[test_case] = xru.ds_concat(all_suggest[test_case], dims=(TEST_CASE, METHOD, TRIAL))
assert all(
all_suggest[test_case][kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
for kk in all_suggest[test_case]
)
assert not any(np.any(np.isnan(all_suggest[test_case][kk].values)) for kk in all_suggest[test_case])
assert xru.coord_compat((all_perf, all_suggest[test_case]), (ITER, METHOD, TRIAL))
assert all_suggest[test_case].coords[TEST_CASE].shape == (1,), "test case should be singleton"
return all_perf, all_time, all_suggest, all_sigs