in lib/utils/checkpoints_rel.py [0:0]
def load_model_from_params_file(model, params_file, checkpoint_dir=None):
# in case of cluster failure, we should resume from the last checkpoint rather
# than the params_file if specified
checkpoint_exists = False
if checkpoint_dir is not None:
checkpointed_files = os.listdir(checkpoint_dir)
for f in checkpointed_files:
# if f.endswith('.pkl'):
if f.find('c2_model_iter') >= 0:
checkpoint_exists = True
break
prev_lr = None
if params_file and os.path.exists(params_file) and not checkpoint_exists:
logger.info('Initializing model parameters from {}'.format(params_file))
start_model_iter, prev_lr = initialize_params_from_file(
model=model, weights_file=params_file, num_devices=cfg.NUM_DEVICES,
)
elif cfg.CHECKPOINT.RESUME and cfg.CHECKPOINT.CHECKPOINT_MODEL:
start_model_iter = 0
params_file = get_checkpoint_resume_file(checkpoint_dir)
if params_file is not None and os.path.exists(params_file):
start_model_iter, prev_lr = initialize_params_from_file(
model=model, weights_file=params_file, num_devices=cfg.NUM_DEVICES,
)
else:
logger.info('Params file does not exist: {}'.format(params_file))
return start_model_iter, prev_lr, checkpoint_exists