in src/sagemaker_core/helper/session_helper.py [0:0]
def upload_data(self, path, bucket=None, key_prefix="data", callback=None, extra_args=None):
"""Upload local file or directory to S3.
If a single file is specified for upload, the resulting S3 object key is
``{key_prefix}/{filename}`` (filename does not include the local path, if any specified).
If a directory is specified for upload, the API uploads all content, recursively,
preserving relative structure of subdirectories. The resulting object key names are:
``{key_prefix}/{relative_subdirectory_path}/filename``.
Args:
path (str): Path (absolute or relative) of local file or directory to upload.
bucket (str): Name of the S3 Bucket to upload to (default: None). If not specified, the
default bucket of the ``Session`` is used (if default bucket does not exist, the
``Session`` creates it).
key_prefix (str): Optional S3 object key name prefix (default: 'data'). S3 uses the
prefix to create a directory structure for the bucket content that it display in
the S3 console.
extra_args (dict): Optional extra arguments that may be passed to the upload operation.
Similar to ExtraArgs parameter in S3 upload_file function. Please refer to the
ExtraArgs parameter documentation here:
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html#the-extraargs-parameter
Returns:
str: The S3 URI of the uploaded file(s). If a file is specified in the path argument,
the URI format is: ``s3://{bucket name}/{key_prefix}/{original_file_name}``.
If a directory is specified in the path argument, the URI format is
``s3://{bucket name}/{key_prefix}``.
"""
bucket, key_prefix = self.determine_bucket_and_prefix(
bucket=bucket, key_prefix=key_prefix, sagemaker_session=self
)
# Generate a tuple for each file that we want to upload of the form (local_path, s3_key).
files = []
key_suffix = None
if os.path.isdir(path):
for dirpath, _, filenames in os.walk(path):
for name in filenames:
local_path = os.path.join(dirpath, name)
s3_relative_prefix = (
"" if path == dirpath else os.path.relpath(dirpath, start=path) + "/"
)
s3_key = "{}/{}{}".format(key_prefix, s3_relative_prefix, name)
files.append((local_path, s3_key))
else:
_, name = os.path.split(path)
s3_key = "{}/{}".format(key_prefix, name)
files.append((path, s3_key))
key_suffix = name
if self.s3_resource is None:
s3 = self.boto_session.resource("s3", region_name=self.boto_region_name)
else:
s3 = self.s3_resource
for local_path, s3_key in files:
s3.Object(bucket, s3_key).upload_file(
local_path, Callback=callback, ExtraArgs=extra_args
)
s3_uri = "s3://{}/{}".format(bucket, key_prefix)
# If a specific file was used as input (instead of a directory), we return the full S3 key
# of the uploaded object. This prevents unintentionally using other files under the same
# prefix during training.
if key_suffix:
s3_uri = "{}/{}".format(s3_uri, key_suffix)
return s3_uri