in src/sagemaker/local/image.py [0:0]
def retrieve_artifacts(self, compose_data, output_data_config, job_name):
"""Get the model artifacts from all the container nodes.
Used after training completes to gather the data from all the
individual containers. As the official SageMaker Training Service, it
will override duplicate files if multiple containers have the same file
names.
Args:
compose_data (dict): Docker-Compose configuration in dictionary
format.
output_data_config: The configuration of the output data.
job_name: The name of the job.
Returns: Local path to the collected model artifacts.
"""
# We need a directory to store the artfiacts from all the nodes
# and another one to contained the compressed final artifacts
artifacts = os.path.join(self.container_root, "artifacts")
compressed_artifacts = os.path.join(self.container_root, "compressed_artifacts")
os.mkdir(artifacts)
model_artifacts = os.path.join(artifacts, "model")
output_artifacts = os.path.join(artifacts, "output")
artifact_dirs = [model_artifacts, output_artifacts, compressed_artifacts]
for d in artifact_dirs:
os.mkdir(d)
# Gather the artifacts from all nodes into artifacts/model and artifacts/output
for host in self.hosts:
volumes = compose_data["services"][str(host)]["volumes"]
volumes = [v[:-2] if v.endswith(":z") else v for v in volumes]
for volume in volumes:
if re.search(r"^[A-Za-z]:", volume):
unit, host_dir, container_dir = volume.split(":")
host_dir = unit + ":" + host_dir
else:
host_dir, container_dir = volume.split(":")
if container_dir == "/opt/ml/model":
sagemaker.local.utils.recursive_copy(host_dir, model_artifacts)
elif container_dir == "/opt/ml/output":
sagemaker.local.utils.recursive_copy(host_dir, output_artifacts)
# Tar Artifacts -> model.tar.gz and output.tar.gz
model_files = [os.path.join(model_artifacts, name) for name in os.listdir(model_artifacts)]
output_files = [
os.path.join(output_artifacts, name) for name in os.listdir(output_artifacts)
]
sagemaker.utils.create_tar_file(
model_files, os.path.join(compressed_artifacts, "model.tar.gz")
)
sagemaker.utils.create_tar_file(
output_files, os.path.join(compressed_artifacts, "output.tar.gz")
)
if output_data_config["S3OutputPath"] == "":
output_data = "file://%s" % compressed_artifacts
else:
# Now we just need to move the compressed artifacts to wherever they are required
output_data = sagemaker.local.utils.move_to_destination(
compressed_artifacts,
output_data_config["S3OutputPath"],
job_name,
self.sagemaker_session,
prefix="output",
)
_delete_tree(model_artifacts)
_delete_tree(output_artifacts)
return os.path.join(output_data, "model.tar.gz")