in bayesmark/experiment_aggregate.py [0:0]
def load_experiments(uuid_list, db_root, dbid): # pragma: io
"""Generator to load the results of the experiments.
Parameters
----------
uuid_list : list(uuid.UUID)
List of UUIDs corresponding to experiments to load.
db_root : str
Root location for data store as requested by the serializer used.
dbid : str
Name of the data store as requested by the serializer used.
Yields
------
meta_data : (str, str, str)
The `meta_data` contains a `tuple` of `str` with ``test_case, optimizer, uuid``.
data : (:class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset` list(float))
The `data` contains a tuple of ``(perf_ds, time_ds, suggest_ds, sig)``. The `perf_ds` is a
:class:`xarray:xarray.Dataset` containing the evaluation results with dimensions ``(ITER, SUGGEST)``, each
variable is an objective. The `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of
the form accepted by `summarize_time`. The coordinates must be compatible with `perf_ds`. The suggest_ds is a
:class:`xarray:xarray.Dataset` containing the inputs to the function evaluations. Each variable is a function
input. Finally, `sig` contains the `test_case` signature and must be `list(float)`.
"""
uuids_seen = set()
for uuid_ in uuid_list:
logger.info(uuid_.hex)
# Load perf and timing data
perf_ds, meta = XRSerializer.load(db_root, db=dbid, key=cc.EVAL, uuid_=uuid_)
time_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.TIME, uuid_=uuid_)
assert meta == meta_t, "meta data should between time and eval files"
suggest_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.SUGGEST_LOG, uuid_=uuid_)
assert meta == meta_t, "meta data should between suggest and eval files"
# Get signature to pass out as well
_, sig = meta["signature"]
logger.info(meta)
logger.info(sig)
# Build the new indices for combined data, this could be put in function for easier testing
eval_args = unserializable_dict(meta["args"]) # Unpack meta-data
test_case = SklearnModel.test_case_str(
eval_args[CmdArgs.classifier], eval_args[CmdArgs.data], eval_args[CmdArgs.metric]
)
optimizer = str_join_safe(
ARG_DELIM, (eval_args[CmdArgs.optimizer], eval_args[CmdArgs.opt_rev], eval_args[CmdArgs.rev])
)
args_uuid = eval_args[CmdArgs.uuid]
# Check UUID sanity
assert isinstance(args_uuid, str)
assert args_uuid == uuid_.hex, "UUID meta-data does not match filename"
assert args_uuid not in uuids_seen, "uuids being reused between studies"
uuids_seen.add(args_uuid)
# Return key -> data so this generator can be iterated over in dict like manner
meta_data = (test_case, optimizer, args_uuid)
data = (perf_ds, time_ds, suggest_ds, sig)
yield meta_data, data