in scripts/run_evaluation.py [0:0]
def run_evaluation(sweep_dir, force, Measures, ensembles_only, algorithms_only):
sweep_path = Path(sweep_dir)
clustering_path = sweep_path / 'clustering.pkl'
models_paths = [el for el in sweep_path.iterdir() if el.is_dir()]
models_paths = filter(utils.is_model, models_paths)
models_paths = filter(utils.train_done, models_paths)
models_paths = filter(utils.calibration_done, models_paths)
evaluation_cfg = OmegaConf.create(EVALUATION_CFG)
if ensembles_only and algorithms_only:
err_msg = 'only one of ensemble_only or algorithm_only can be selected at a time'
raise ValueError(err_msg)
if algorithms_only:
models_paths = list(filter(utils.is_algorithm, models_paths))
if ensembles_only:
models_paths = list(filter(utils.is_ensemble, models_paths))
evaluation_cfg.slurm.constraint = 'volta32gb'
executor = utils.get_slurm_executor(copy.deepcopy(evaluation_cfg).slurm,
log_folder=str(sweep_path / 'logs' / 'run_evaluation'))
# Constructing jobs
jobs, paths= [], []
partitions = load_partitions(evaluation_cfg, clustering_file=str(clustering_path))
with executor.batch():
for model_path, Measure in itertools.product(models_paths, Measures):
trace = f'ood_{Measure.__name__}'
is_done = utils.trace_exists(f'{trace}.done', dir_=str(model_path))
if is_done and not force:
print(f'{Measure.__name__} is done. Skipping')
continue
if (model_path / 'train_cfg.yaml').is_file():
train_cfg = utils.load_cfg(model_path / 'train_cfg.yaml')
elif (model_path / 'cfg_rank_0.yaml').is_file():
train_cfg = utils.load_cfg(model_path / 'cfg_rank_0.yaml')
else:
err_msg = 'train config not found'
raise ValueError(err_msg)
Algorithm = utils.load_model_cls(train_cfg)
worker_args = (
str(model_path),
evaluation_cfg,
train_cfg,
Algorithm,
Measure,
partitions)
worker = workers.Evaluator()
job = executor.submit(worker, *worker_args)
jobs += [job]
paths += [model_path]
utils.write_trace(f'{trace}.pending', dir_=str(model_path))
# Waiting for jobs to finish
finished_jobs, jobs = utils.handle_jobs(jobs)
return finished_jobs, jobs