in gslib/utils/copy_helper.py [0:0]
def ExpandUrlToSingleBlr(url_str,
gsutil_api,
project_id,
treat_nonexistent_object_as_subdir=False,
logger=None):
"""Expands wildcard if present in url_str.
Args:
url_str: String representation of requested url.
gsutil_api: gsutil Cloud API instance to use.
project_id: project ID to use (for iterators).
treat_nonexistent_object_as_subdir: indicates if should treat a non-existent
object as a subdir.
logger: logging.Logger instance to use for output. If None, the root Logger
will be used.
Returns:
(exp_url, have_existing_dst_container)
where exp_url is a StorageUrl
and have_existing_dst_container is a bool indicating whether
exp_url names an existing directory, bucket, or bucket subdirectory.
In the case where we match a subdirectory AND an object, the
object is returned.
Raises:
CommandException: if url_str matched more than 1 URL.
"""
logger = logger or logging.Logger()
# Handle wildcarded url case.
if ContainsWildcard(url_str):
blr_expansion = list(
CreateWildcardIterator(url_str,
gsutil_api,
project_id=project_id,
logger=logger))
if len(blr_expansion) != 1:
raise CommandException('Destination (%s) must match exactly 1 URL' %
url_str)
blr = blr_expansion[0]
# BLR is either an OBJECT, PREFIX, or BUCKET; the latter two represent
# directories.
return (StorageUrlFromString(blr.url_string), not blr.IsObject())
storage_url = StorageUrlFromString(url_str)
# Handle non-wildcarded URL.
if storage_url.IsFileUrl():
return (storage_url, storage_url.IsDirectory())
# At this point we have a cloud URL.
if storage_url.IsBucket():
return (storage_url, True)
# For object/prefix URLs, there are four cases that indicate the destination
# is a cloud subdirectory; these are always considered to be an existing
# container. Checking each case allows gsutil to provide Unix-like
# destination folder semantics, but requires up to three HTTP calls, noted
# below.
# Case 1: If a placeholder object ending with '/' exists.
if IsCloudSubdirPlaceholder(storage_url):
return (storage_url, True)
# Get version of object name without trailing slash for matching prefixes
prefix = storage_url.object_name.rstrip('/')
# HTTP call to make an eventually consistent check for a matching prefix,
# _$folder$, or empty listing.
list_iterator = gsutil_api.ListObjects(storage_url.bucket_name,
prefix=prefix,
delimiter='/',
provider=storage_url.scheme,
fields=['prefixes', 'items/name'])
for obj_or_prefix in list_iterator:
# To conserve HTTP calls for the common case, we make a single listing
# that covers prefixes and object names. Listing object names covers the
# _$folder$ case and the nonexistent-object-as-subdir case. However, if
# there are many existing objects for which the target URL is an exact
# prefix, this listing could be paginated and span multiple HTTP calls.
# If this case becomes common, we could heurestically abort the
# listing operation after the first page of results and just query for the
# _$folder$ object directly using GetObjectMetadata.
# TODO: currently the ListObjects iterator yields objects before prefixes,
# because ls depends on this iteration order for proper display. We could
# save up to 1ms in determining that a destination is a prefix if we had a
# way to yield prefixes first, but this would require poking a major hole
# through the abstraction to control this iteration order.
if (obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.PREFIX and
obj_or_prefix.data == prefix + '/'):
# Case 2: If there is a matching prefix when listing the destination URL.
return (storage_url, True)
elif (obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT and
obj_or_prefix.data.name == storage_url.object_name + '_$folder$'):
# Case 3: If a placeholder object matching destination + _$folder$
# exists.
return (storage_url, True)
elif (obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT and
obj_or_prefix.data.name == storage_url.object_name):
# The object exists but it is not a container
return (storage_url, False)
# Case 4: If no objects/prefixes matched, and nonexistent objects should be
# treated as subdirectories.
return (storage_url, treat_nonexistent_object_as_subdir)