in source/train.py [0:0]
def _save_model():
"""
This method copies model weight, config, and checkpoint(optionally)
from output directory to model directory.
Sagemaker then automatically archives content of model directory
and adds it to model registry once training job is completed.
"""
logger.info("Saving the model into model dir")
model_dir = os.environ['SM_MODEL_DIR']
output_dir = os.environ['SM_OUTPUT_DATA_DIR']
# copy model_final.pth to model dir
model_path = os.path.join(output_dir, "model_final.pth")
new_model_path = os.path.join(model_dir, 'model.pth')
shutil.copyfile(model_path, new_model_path)
shutil.copytree('/opt/ml/code/', os.path.join(model_dir, 'code'))
# copy config.yaml to model dir
config_path = os.path.join(output_dir, "config.yaml")
new_config_path = os.path.join(model_dir, "config.yaml")
shutil.copyfile(config_path, new_config_path)
try:
# copy checkpoint file to model dir
checkpoint_path = os.path.join(output_dir, "last_checkpoint")
new_checkpoint_path = os.path.join(model_dir, "last_checkpoint")
shutil.copyfile(checkpoint_path, new_checkpoint_path)
except Exception:
logger.debug("D2 checkpoint file is not available.")