def gen_commands()

in bayesmark/experiment_launcher.py [0:0]
46 lines of code
22 McCabe index (conditional complexity)

def gen_commands(args, opt_file_lookup, run_uuid):
    """Generator providing commands to launch processes for experiments.

    Parameters
    ----------
    args : dict(CmdArgs, [int, str])
        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
        passed to this program.
    opt_file_lookup : dict(str, str)
        Mapping from method name to filename containing wrapper class for the method.
    run_uuid : uuid.UUID
        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
        deterministic provided the same `run_uuid`.

    Yields
    ------
    iteration_key : (str, str, str, str)
        Tuple containing ``(trial, classifier, data, optimizer)`` to index the experiment.
    full_cmd : tuple(str)
        Strings containing command and arguments to run a process with experiment. Join with whitespace or use
        :func:`.util.shell_join` to get string with executable command. The command omits ``--opt-root`` which means it
        will default to ``.`` if the command is executed. As such, the command assumes it is executed with
        ``--opt-root`` as the working directory.
    """
    args_to_pass_thru = [CmdArgs.n_calls, CmdArgs.n_suggest, CmdArgs.db_root, CmdArgs.db]
    # This could be made simpler and avoid if statement if we just always pass dataroot, even if no custom data used.
    if args[CmdArgs.data_root] is not None:
        args_to_pass_thru.append(CmdArgs.data_root)

    # Possibilities to iterate over. Put them in sorted order just for good measure.
    c_list = strict_sorted(MODEL_NAMES if args[CmdArgs.classifier] is None else args[CmdArgs.classifier])
    d_list = strict_sorted(DATA_LOADER_NAMES if args[CmdArgs.data] is None else args[CmdArgs.data])
    o_list = strict_sorted(
        list(opt_file_lookup.keys()) + list(CONFIG.keys())
        if args[CmdArgs.optimizer] is None
        else args[CmdArgs.optimizer]
    )
    assert all(
        ((optimizer in opt_file_lookup) or (optimizer in CONFIG)) for optimizer in o_list
    ), "unknown optimizer in optimizer list"

    m_set = set(METRICS if args[CmdArgs.metric] is None else args[CmdArgs.metric])
    m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in METRICS_LOOKUP.items()}
    assert all(
        (len(m_lookup[get_problem_type(data)]) > 0) for data in d_list
    ), "At one metric needed for each problem type of data sets"

    G = product(range_str(args[CmdArgs.n_repeat]), c_list, d_list, o_list)  # iterate all combos
    for rep, classifier, data, optimizer in G:
        _, rep_str = rep
        problem_type = get_problem_type(data)
        for metric in m_lookup[problem_type]:
            # Get a reproducible string based (conditioned on having same (run uuid), but should also never give
            # a duplicate (unless we force the same run uuid twice).
            iteration_key = (rep_str, classifier, data, optimizer, metric)
            iteration_id = str_join_safe(ARG_DELIM, iteration_key)
            sub_uuid = pyuuid.uuid5(run_uuid, iteration_id).hex

            # Build the argument list for subproc, passing some args thru
            cmd_args_pass_thru = [[CMD_STR[vv][0], arg_safe_str(args[vv])] for vv in args_to_pass_thru]
            # Technically, the optimizer is is not actually needed here for non-built in optimizers because it already
            # specified via the entry point: optimizer_wrapper_file
            cmd_args = [
                [CMD_STR[CmdArgs.classifier][0], arg_safe_str(classifier)],
                [CMD_STR[CmdArgs.data][0], arg_safe_str(data)],
                [CMD_STR[CmdArgs.optimizer][0], arg_safe_str(optimizer)],
                [CMD_STR[CmdArgs.uuid][0], arg_safe_str(sub_uuid)],
                [CMD_STR[CmdArgs.metric][0], arg_safe_str(metric)],
            ]
            cmd_args = tuple(sum(cmd_args + cmd_args_pass_thru, []))
            logger.info(" ".join(cmd_args))

            # The experiment command without the arguments
            if optimizer in CONFIG:  # => built in optimizer wrapper
                experiment_cmd = (EXPERIMENT_ENTRY,)
            else:
                optimizer_wrapper_file = opt_file_lookup[optimizer]
                assert optimizer_wrapper_file.endswith(".py"), "optimizer wrapper should a be .py file"
                experiment_cmd = (PY_INTERPRETER, optimizer_wrapper_file)

            # Check arg safe again, off elements in list need to be argsafe
            assert all((_is_arg_safe(ss) == (ii % 2 == 1)) for ii, ss in enumerate(cmd_args))

            full_cmd = experiment_cmd + cmd_args
            yield iteration_key, full_cmd