in gslib/name_expansion.py [0:0]
def __iter__(self):
"""Iterates over all source URLs passed to the iterator.
For each src url, expands wildcards, object-less bucket names,
subdir bucket names, and directory names, and generates a flat listing of
all the matching objects/files.
You should instantiate this object using the static factory function
NameExpansionIterator, because consumers of this iterator need the
PluralityCheckableIterator wrapper built by that function.
Yields:
gslib.name_expansion.NameExpansionResult.
Raises:
CommandException: if errors encountered.
"""
for url_str in self.url_strs:
storage_url = StorageUrlFromString(url_str)
if (storage_url.IsFileUrl() and
(storage_url.IsStream() or storage_url.IsFifo())):
if self.url_strs.has_plurality:
raise CommandException('Multiple URL strings are not supported '
'with streaming ("-") URLs or named pipes.')
yield NameExpansionResult(source_storage_url=storage_url,
is_multi_source_request=False,
is_multi_top_level_source_request=False,
names_container=False,
expanded_storage_url=storage_url,
expanded_result=None)
continue
# Step 1: Expand any explicitly specified wildcards. The output from this
# step is an iterator of BucketListingRef.
# Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
src_names_bucket = False
if (storage_url.IsCloudUrl() and storage_url.IsBucket() and
not self.recursion_requested):
# UNIX commands like rm and cp will omit directory references.
# If url_str refers only to buckets and we are not recursing,
# then produce references of type BUCKET, because they are guaranteed
# to pass through Step 2 and be omitted in Step 3.
post_step1_iter = PluralityCheckableIterator(
self.WildcardIterator(url_str).IterBuckets(bucket_fields=['id']))
else:
# Get a list of objects and prefixes, expanding the top level for
# any listed buckets. If our source is a bucket, however, we need
# to treat all of the top level expansions as names_container=True.
post_step1_iter = PluralityCheckableIterator(
self.WildcardIterator(url_str).IterAll(
bucket_listing_fields=self.bucket_listing_fields,
expand_top_level_buckets=True))
if storage_url.IsCloudUrl() and storage_url.IsBucket():
src_names_bucket = True
src_url_expands_to_multi = post_step1_iter.HasPlurality()
is_multi_top_level_source_request = (self.url_strs.has_plurality or
src_url_expands_to_multi)
# Step 2: Expand bucket subdirs. The output from this
# step is an iterator of (names_container, BucketListingRef).
# Starting with gs://bucket/abcd this step would expand to:
# iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
subdir_exp_wildcard = self._flatness_wildcard[self.recursion_requested]
if self.recursion_requested:
post_step2_iter = _ImplicitBucketSubdirIterator(
self, post_step1_iter, subdir_exp_wildcard,
self.bucket_listing_fields)
else:
post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
post_step2_iter = PluralityCheckableIterator(post_step2_iter)
# Because we actually perform and check object listings here, this will
# raise if url_args includes a non-existent object. However,
# plurality_checkable_iterator will buffer the exception for us, not
# raising it until the iterator is actually asked to yield the first
# result.
if post_step2_iter.IsEmpty():
if self.continue_on_error:
try:
raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
except CommandException as e:
# Yield a specialized tuple of (exception, stack_trace) to
# the wrapping PluralityCheckableIterator.
yield (e, sys.exc_info()[2])
else:
raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
# Step 3. Omit any directories, buckets, or bucket subdirectories for
# non-recursive expansions.
post_step3_iter = PluralityCheckableIterator(
_OmitNonRecursiveIterator(post_step2_iter, self.recursion_requested,
self.command_name,
self.cmd_supports_recursion, self.logger))
src_url_expands_to_multi = post_step3_iter.HasPlurality()
is_multi_source_request = (self.url_strs.has_plurality or
src_url_expands_to_multi)
# Step 4. Expand directories and buckets. This step yields the iterated
# values. Starting with gs://bucket this step would expand to:
# [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
# Starting with file://dir this step would expand to:
# [dir/a.txt, dir/b.txt, dir/c/]
for (names_container, blr) in post_step3_iter:
src_names_container = src_names_bucket or names_container
if blr.IsObject():
yield NameExpansionResult(
source_storage_url=storage_url,
is_multi_source_request=is_multi_source_request,
is_multi_top_level_source_request=
is_multi_top_level_source_request,
names_container=src_names_container,
expanded_storage_url=blr.storage_url,
expanded_result=blr.root_object)
else:
# Use implicit wildcarding to do the enumeration.
# At this point we are guaranteed that:
# - Recursion has been requested because non-object entries are
# filtered in step 3 otherwise.
# - This is a prefix or bucket subdirectory because only
# non-recursive iterations product bucket references.
expanded_url = StorageUrlFromString(blr.url_string)
if expanded_url.IsFileUrl():
# Convert dir to implicit recursive wildcard.
url_to_iterate = '%s%s%s' % (blr, os.sep, subdir_exp_wildcard)
else:
# Convert subdir to implicit recursive wildcard.
url_to_iterate = expanded_url.CreatePrefixUrl(
wildcard_suffix=subdir_exp_wildcard)
wc_iter = PluralityCheckableIterator(
self.WildcardIterator(url_to_iterate).IterObjects(
bucket_listing_fields=self.bucket_listing_fields))
src_url_expands_to_multi = (src_url_expands_to_multi or
wc_iter.HasPlurality())
is_multi_source_request = (self.url_strs.has_plurality or
src_url_expands_to_multi)
# This will be a flattened listing of all underlying objects in the
# subdir.
for blr in wc_iter:
yield NameExpansionResult(
source_storage_url=storage_url,
is_multi_source_request=is_multi_source_request,
is_multi_top_level_source_request=(
is_multi_top_level_source_request),
names_container=True,
expanded_storage_url=blr.storage_url,
expanded_result=blr.root_object)