in bayesmark/experiment_launcher.py [0:0]
def dry_run(args, opt_file_lookup, run_uuid, fp, random=np_random):
"""Write to buffer description of commands for running all experiments.
This function is almost pure by writing to a buffer, but it could be switched to a generator.
Parameters
----------
args : dict(CmdArgs, [int, str])
Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
passed to this program.
opt_file_lookup : dict(str, str)
Mapping from method name to filename containing wrapper class for the method.
run_uuid : uuid.UUID
UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
deterministic provided the same `run_uuid`.
fp : writable buffer
File handle to write out sequence of commands to execute (broken into jobs on each line) to execute all the
experiments (possibly each job in parallel).
random : RandomState
Random stream to use for reproducibility.
"""
assert args[CmdArgs.n_jobs] > 0, "Must have non-zero jobs for dry run"
# Taking in file pointer since then we can test without actual file. Could also build generator that returns lines
# to write.
manual_setup_info = XRSerializer.init_db_manual(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS)
warnings.warn(manual_setup_info, UserWarning)
# Get the commands
dry_run_commands = {}
G = gen_commands(args, opt_file_lookup, run_uuid)
for (_, _, _, optimizer, _), full_cmd in G:
cmd_str = shell_join(full_cmd)
dry_run_commands.setdefault(optimizer, []).append(cmd_str)
# Make sure we never have any empty jobs, which is a waste
n_commands = sum(len(v) for v in dry_run_commands.values())
n_jobs = min(args[CmdArgs.n_jobs], n_commands)
# Would prob also work with pyrandom, but only tested np random so far
subcommands = strat_split(list(dry_run_commands.values()), n_jobs, random=random)
# Make sure have same commands overall, delete once we trust strat_split
assert sorted(np.concatenate(subcommands)) == sorted(sum(list(dry_run_commands.values()), []))
job_suffix = run_uuid.hex[:UUID_JOB_CHARS]
# Include comments as reproducibility lines
args_str = serializable_dict(args)
fp.write("# running: %s\n" % str(args_str))
fp.write("# cmd: %s\n" % cmd.cmd_str())
for ii, ii_str in range_str(n_jobs):
assert len(subcommands[ii]) > 0
fp.write("job_%s_%s %s\n" % (job_suffix, ii_str, " && ".join(subcommands[ii])))