in bayesmark/experiment_launcher.py [0:0]
def gen_commands(args, opt_file_lookup, run_uuid):
"""Generator providing commands to launch processes for experiments.
Parameters
----------
args : dict(CmdArgs, [int, str])
Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
passed to this program.
opt_file_lookup : dict(str, str)
Mapping from method name to filename containing wrapper class for the method.
run_uuid : uuid.UUID
UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
deterministic provided the same `run_uuid`.
Yields
------
iteration_key : (str, str, str, str)
Tuple containing ``(trial, classifier, data, optimizer)`` to index the experiment.
full_cmd : tuple(str)
Strings containing command and arguments to run a process with experiment. Join with whitespace or use
:func:`.util.shell_join` to get string with executable command. The command omits ``--opt-root`` which means it
will default to ``.`` if the command is executed. As such, the command assumes it is executed with
``--opt-root`` as the working directory.
"""
args_to_pass_thru = [CmdArgs.n_calls, CmdArgs.n_suggest, CmdArgs.db_root, CmdArgs.db]
# This could be made simpler and avoid if statement if we just always pass dataroot, even if no custom data used.
if args[CmdArgs.data_root] is not None:
args_to_pass_thru.append(CmdArgs.data_root)
# Possibilities to iterate over. Put them in sorted order just for good measure.
c_list = strict_sorted(MODEL_NAMES if args[CmdArgs.classifier] is None else args[CmdArgs.classifier])
d_list = strict_sorted(DATA_LOADER_NAMES if args[CmdArgs.data] is None else args[CmdArgs.data])
o_list = strict_sorted(
list(opt_file_lookup.keys()) + list(CONFIG.keys())
if args[CmdArgs.optimizer] is None
else args[CmdArgs.optimizer]
)
assert all(
((optimizer in opt_file_lookup) or (optimizer in CONFIG)) for optimizer in o_list
), "unknown optimizer in optimizer list"
m_set = set(METRICS if args[CmdArgs.metric] is None else args[CmdArgs.metric])
m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in METRICS_LOOKUP.items()}
assert all(
(len(m_lookup[get_problem_type(data)]) > 0) for data in d_list
), "At one metric needed for each problem type of data sets"
G = product(range_str(args[CmdArgs.n_repeat]), c_list, d_list, o_list) # iterate all combos
for rep, classifier, data, optimizer in G:
_, rep_str = rep
problem_type = get_problem_type(data)
for metric in m_lookup[problem_type]:
# Get a reproducible string based (conditioned on having same (run uuid), but should also never give
# a duplicate (unless we force the same run uuid twice).
iteration_key = (rep_str, classifier, data, optimizer, metric)
iteration_id = str_join_safe(ARG_DELIM, iteration_key)
sub_uuid = pyuuid.uuid5(run_uuid, iteration_id).hex
# Build the argument list for subproc, passing some args thru
cmd_args_pass_thru = [[CMD_STR[vv][0], arg_safe_str(args[vv])] for vv in args_to_pass_thru]
# Technically, the optimizer is is not actually needed here for non-built in optimizers because it already
# specified via the entry point: optimizer_wrapper_file
cmd_args = [
[CMD_STR[CmdArgs.classifier][0], arg_safe_str(classifier)],
[CMD_STR[CmdArgs.data][0], arg_safe_str(data)],
[CMD_STR[CmdArgs.optimizer][0], arg_safe_str(optimizer)],
[CMD_STR[CmdArgs.uuid][0], arg_safe_str(sub_uuid)],
[CMD_STR[CmdArgs.metric][0], arg_safe_str(metric)],
]
cmd_args = tuple(sum(cmd_args + cmd_args_pass_thru, []))
logger.info(" ".join(cmd_args))
# The experiment command without the arguments
if optimizer in CONFIG: # => built in optimizer wrapper
experiment_cmd = (EXPERIMENT_ENTRY,)
else:
optimizer_wrapper_file = opt_file_lookup[optimizer]
assert optimizer_wrapper_file.endswith(".py"), "optimizer wrapper should a be .py file"
experiment_cmd = (PY_INTERPRETER, optimizer_wrapper_file)
# Check arg safe again, off elements in list need to be argsafe
assert all((_is_arg_safe(ss) == (ii % 2 == 1)) for ii, ss in enumerate(cmd_args))
full_cmd = experiment_cmd + cmd_args
yield iteration_key, full_cmd