in gslib/wildcard_iterator.py [0:0]
def __iter__(self,
bucket_listing_fields=None,
expand_top_level_buckets=False):
"""Iterator that gets called when iterating over the cloud wildcard.
In the case where no wildcard is present, returns a single matching object,
single matching prefix, or one of each if both exist.
Args:
bucket_listing_fields: Iterable fields to include in bucket listings.
Ex. ['name', 'acl']. Iterator is
responsible for converting these to list-style
format ['items/name', 'items/acl'] as well as
adding any fields necessary for listing such as
prefixes. API implementation is responsible for
adding pagination fields. If this is None,
all fields are returned.
expand_top_level_buckets: If true, yield no BUCKET references. Instead,
expand buckets into top-level objects and
prefixes.
Yields:
BucketListingRef of type BUCKET, OBJECT or PREFIX.
"""
single_version_request = self.wildcard_url.HasGeneration()
# For wildcard expansion purposes, we need at a minimum the name of
# each object and prefix. If we're not using the default of requesting
# all fields, make sure at least these are requested. The Cloud API
# tolerates specifying the same field twice.
get_fields = None
if bucket_listing_fields:
get_fields = set()
for field in bucket_listing_fields:
get_fields.add(field)
bucket_listing_fields = self._GetToListFields(
get_fields=bucket_listing_fields)
bucket_listing_fields.update(['items/name', 'prefixes'])
get_fields.update(['name'])
# If we're making versioned requests, ensure generation and
# metageneration are also included.
if single_version_request or self.all_versions:
bucket_listing_fields.update(
['items/generation', 'items/metageneration'])
get_fields.update(['generation', 'metageneration'])
# Handle bucket wildcarding, if any, in _ExpandBucketWildcards. Then
# iterate over the expanded bucket strings and handle any object
# wildcarding.
for bucket_listing_ref in self._ExpandBucketWildcards(bucket_fields=['id']):
bucket_url_string = bucket_listing_ref.url_string
if self.wildcard_url.IsBucket():
# IsBucket() guarantees there are no prefix or object wildcards, and
# thus this is a top-level listing of buckets.
if expand_top_level_buckets:
url = StorageUrlFromString(bucket_url_string)
for obj_or_prefix in self.gsutil_api.ListObjects(
url.bucket_name,
delimiter='/',
all_versions=self.all_versions,
provider=self.wildcard_url.scheme,
fields=bucket_listing_fields):
if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
yield self._GetObjectRef(bucket_url_string,
obj_or_prefix.data,
with_version=self.all_versions)
else: # CloudApi.CsObjectOrPrefixType.PREFIX:
yield self._GetPrefixRef(bucket_url_string, obj_or_prefix.data)
else:
yield bucket_listing_ref
else:
# By default, assume a non-wildcarded URL is an object, not a prefix.
# This prevents unnecessary listings (which are slower, more expensive,
# and also subject to eventual consistency).
if (not ContainsWildcard(self.wildcard_url.url_string) and
self.wildcard_url.IsObject() and not self.all_versions):
try:
get_object = self.gsutil_api.GetObjectMetadata(
self.wildcard_url.bucket_name,
self.wildcard_url.object_name,
generation=self.wildcard_url.generation,
provider=self.wildcard_url.scheme,
fields=get_fields)
yield self._GetObjectRef(self.wildcard_url.bucket_url_string,
get_object,
with_version=(self.all_versions or
single_version_request))
return
except (NotFoundException, AccessDeniedException):
# It's possible this is a prefix - try to list instead.
pass
# Expand iteratively by building prefix/delimiter bucket listing
# request, filtering the results per the current level's wildcard
# (if present), and continuing with the next component of the
# wildcard. See _BuildBucketFilterStrings() documentation for details.
if single_version_request:
url_string = '%s%s#%s' % (bucket_url_string,
self.wildcard_url.object_name,
self.wildcard_url.generation)
else:
# Rstrip any prefixes to correspond with rstripped prefix wildcard
# from _BuildBucketFilterStrings().
url_string = '%s%s' % (
bucket_url_string, StripOneSlash(self.wildcard_url.object_name) or
'/') # Cover root object named '/' case.
urls_needing_expansion = [url_string]
while urls_needing_expansion:
url = StorageUrlFromString(urls_needing_expansion.pop(0))
(prefix, delimiter, prefix_wildcard,
suffix_wildcard) = (self._BuildBucketFilterStrings(url.object_name))
regex_patterns = self._GetRegexPatterns(prefix_wildcard)
# If we have a suffix wildcard, we only care about listing prefixes.
listing_fields = (set(['prefixes'])
if suffix_wildcard else bucket_listing_fields)
# List bucket for objects matching prefix up to delimiter.
for obj_or_prefix in self.gsutil_api.ListObjects(
url.bucket_name,
prefix=prefix,
delimiter=delimiter,
all_versions=self.all_versions or single_version_request,
provider=self.wildcard_url.scheme,
fields=listing_fields):
for pattern in regex_patterns:
if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
gcs_object = obj_or_prefix.data
if pattern.match(gcs_object.name):
if not suffix_wildcard or (StripOneSlash(gcs_object.name)
== suffix_wildcard):
if not single_version_request or (
self._SingleVersionMatches(gcs_object.generation)):
yield self._GetObjectRef(
bucket_url_string,
gcs_object,
with_version=(self.all_versions or
single_version_request))
break
else: # CloudApi.CsObjectOrPrefixType.PREFIX
prefix = obj_or_prefix.data
if ContainsWildcard(prefix):
# TODO: Disambiguate user-supplied strings from iterated
# prefix and object names so that we can better reason
# about wildcards and handle this case without raising
# an error.
raise CommandException(
'Cloud folder %s%s contains a wildcard; gsutil does '
'not currently support objects with wildcards in their '
'name.' % (bucket_url_string, prefix))
# If the prefix ends with a slash, remove it. Note that we only
# remove one slash so that we can successfully enumerate dirs
# containing multiple slashes.
rstripped_prefix = StripOneSlash(prefix)
if pattern.match(rstripped_prefix):
if suffix_wildcard and rstripped_prefix != suffix_wildcard:
# There's more wildcard left to expand.
url_append_string = '%s%s' % (bucket_url_string,
rstripped_prefix + '/' +
suffix_wildcard)
urls_needing_expansion.append(url_append_string)
else:
# No wildcard to expand, just yield the prefix.
yield self._GetPrefixRef(bucket_url_string, prefix)
break