def load_experiments()

in bayesmark/experiment_aggregate.py [0:0]
27 lines of code
5 McCabe index (conditional complexity)

def load_experiments(uuid_list, db_root, dbid):  # pragma: io
    """Generator to load the results of the experiments.

    Parameters
    ----------
    uuid_list : list(uuid.UUID)
        List of UUIDs corresponding to experiments to load.
    db_root : str
        Root location for data store as requested by the serializer used.
    dbid : str
        Name of the data store as requested by the serializer used.

    Yields
    ------
    meta_data : (str, str, str)
        The `meta_data` contains a `tuple` of `str` with ``test_case, optimizer, uuid``.
    data : (:class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset` list(float))
        The `data` contains a tuple of ``(perf_ds, time_ds, suggest_ds, sig)``. The `perf_ds` is a
        :class:`xarray:xarray.Dataset` containing the evaluation results with dimensions ``(ITER, SUGGEST)``, each
        variable is an objective. The `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of
        the form accepted by `summarize_time`. The coordinates must be compatible with `perf_ds`. The suggest_ds is a
        :class:`xarray:xarray.Dataset` containing the inputs to the function evaluations. Each variable is a function
        input. Finally, `sig` contains the `test_case` signature and must be `list(float)`.
    """
    uuids_seen = set()
    for uuid_ in uuid_list:
        logger.info(uuid_.hex)

        # Load perf and timing data
        perf_ds, meta = XRSerializer.load(db_root, db=dbid, key=cc.EVAL, uuid_=uuid_)
        time_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.TIME, uuid_=uuid_)
        assert meta == meta_t, "meta data should between time and eval files"
        suggest_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.SUGGEST_LOG, uuid_=uuid_)
        assert meta == meta_t, "meta data should between suggest and eval files"

        # Get signature to pass out as well
        _, sig = meta["signature"]
        logger.info(meta)
        logger.info(sig)

        # Build the new indices for combined data, this could be put in function for easier testing
        eval_args = unserializable_dict(meta["args"])  # Unpack meta-data
        test_case = SklearnModel.test_case_str(
            eval_args[CmdArgs.classifier], eval_args[CmdArgs.data], eval_args[CmdArgs.metric]
        )
        optimizer = str_join_safe(
            ARG_DELIM, (eval_args[CmdArgs.optimizer], eval_args[CmdArgs.opt_rev], eval_args[CmdArgs.rev])
        )
        args_uuid = eval_args[CmdArgs.uuid]

        # Check UUID sanity
        assert isinstance(args_uuid, str)
        assert args_uuid == uuid_.hex, "UUID meta-data does not match filename"
        assert args_uuid not in uuids_seen, "uuids being reused between studies"
        uuids_seen.add(args_uuid)

        # Return key -> data so this generator can be iterated over in dict like manner
        meta_data = (test_case, optimizer, args_uuid)
        data = (perf_ds, time_ds, suggest_ds, sig)
        yield meta_data, data