in distributed_training/src_dir/util.py [0:0]
def sync_local_checkpoints_to_s3(
local_path="/opt/ml/checkpoints",
s3_path=os.path.dirname(os.path.dirname(os.getenv('SM_MODULE_DIR', ''))) +
'/checkpoints'):
""" sample function to sync checkpoints from local path to s3 """
import boto3, botocore
#check if local path exists
if not os.path.exists(local_path):
raise RuntimeError(
"Provided local path {local_path} does not exist. Please check")
#check if s3 bucket exists
s3 = boto3.resource('s3')
if 's3://' not in s3_path:
raise ValueError(
"Provided s3 path {s3_path} is not valid. Please check")
s3_bucket = s3_path.replace('s3://', '').split('/')[0]
print(f"S3 Bucket: {s3_bucket}")
try:
s3.meta.client.head_bucket(Bucket=s3_bucket)
except botocore.exceptions.ClientError as e:
error_code = e.response['Error']['Code']
if error_code == '404':
raise RuntimeError('S3 bucket does not exist. Please check')
aws_s3_sync(local_path, s3_path)
return