def sweep_collect()

in tools/sweep_collect.py [0:0]


def sweep_collect():
    """Collects results of a sweep."""
    # Get cfg and log files
    sweep_dir = os.path.join(sweep_cfg.ROOT_DIR, sweep_cfg.NAME)
    print("Collecting jobs for {:s}... ".format(sweep_dir))
    cfgs_dir = os.path.join(sweep_dir, "cfgs")
    logs_dir = os.path.join(sweep_dir, "logs")
    assert os.path.exists(cfgs_dir), "Cfgs dir {} not found".format(cfgs_dir)
    assert os.path.exists(logs_dir), "Logs dir {} not found".format(logs_dir)
    cfg_files = [c for c in os.listdir(cfgs_dir) if c.endswith(".yaml")]
    log_files = logging.get_log_files(logs_dir)[0]
    # Create worker pool for collecting jobs
    process_pool = multiprocessing.Pool(sweep_cfg.NUM_PROC)
    # Load the sweep and keep only non-empty data
    print("Collecting jobs...")
    sweep = list(process_pool.map(load_data, log_files))
    # Print basic stats for sweep status
    key = "test_epoch"
    epoch_ind = [d[key]["epoch_ind"][-1] if key in d else 0 for d in sweep]
    epoch_max = [d[key]["epoch_max"][-1] if key in d else 1 for d in sweep]
    epoch = ["{}/{}".format(i, m) for i, m in zip(epoch_ind, epoch_max)]
    epoch = [e.ljust(len(max(epoch, key=len))) for e in epoch]
    job_done = sum(i == m for i, m in zip(epoch_ind, epoch_max))
    for d, e, i, m in zip(sweep, epoch, epoch_ind, epoch_max):
        out_str = "  {} [{:3d}%] [{:}]" + (" [stderr]" if d["err"] else "")
        print(out_str.format(d["log_file"], int(i / m * 100), e))
    jobs_start = "jobs_started={}/{}".format(len(sweep), len(cfg_files))
    jobs_done = "jobs_done={}/{}".format(job_done, len(cfg_files))
    ep_done = "epochs_done={}/{}".format(sum(epoch_ind), sum(epoch_max))
    print("Status: {}, {}, {}".format(jobs_start, jobs_done, ep_done))
    # Save the sweep data
    sweep_file = os.path.join(sweep_dir, "sweep.json")
    print("Writing sweep data to: {}".format(sweep_file))
    with open(sweep_file, "w") as f:
        json.dump(sweep, f, sort_keys=True)
    # Clean up checkpoints after saving sweep data, if needed
    keep = sweep_cfg.COLLECT.CHECKPOINTS_KEEP
    cp_dirs = [f.replace("stdout.log", "checkpoints/") for f in log_files]
    delete_cps = functools.partial(cp.delete_checkpoints, keep=keep)
    num_cleaned = sum(process_pool.map(delete_cps, cp_dirs))
    print("Deleted {} total checkpoints".format(num_cleaned))