def __iter__()

in gslib/name_expansion.py [0:0]


  def __iter__(self):
    """Iterates over all source URLs passed to the iterator.

    For each src url, expands wildcards, object-less bucket names,
    subdir bucket names, and directory names, and generates a flat listing of
    all the matching objects/files.

    You should instantiate this object using the static factory function
    NameExpansionIterator, because consumers of this iterator need the
    PluralityCheckableIterator wrapper built by that function.

    Yields:
      gslib.name_expansion.NameExpansionResult.

    Raises:
      CommandException: if errors encountered.
    """
    for url_str in self.url_strs:
      storage_url = StorageUrlFromString(url_str)

      if (storage_url.IsFileUrl() and
          (storage_url.IsStream() or storage_url.IsFifo())):
        if self.url_strs.has_plurality:
          raise CommandException('Multiple URL strings are not supported '
                                 'with streaming ("-") URLs or named pipes.')
        yield NameExpansionResult(source_storage_url=storage_url,
                                  is_multi_source_request=False,
                                  is_multi_top_level_source_request=False,
                                  names_container=False,
                                  expanded_storage_url=storage_url,
                                  expanded_result=None)
        continue

      # Step 1: Expand any explicitly specified wildcards. The output from this
      # step is an iterator of BucketListingRef.
      # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd

      src_names_bucket = False
      if (storage_url.IsCloudUrl() and storage_url.IsBucket() and
          not self.recursion_requested):
        # UNIX commands like rm and cp will omit directory references.
        # If url_str refers only to buckets and we are not recursing,
        # then produce references of type BUCKET, because they are guaranteed
        # to pass through Step 2 and be omitted in Step 3.
        post_step1_iter = PluralityCheckableIterator(
            self.WildcardIterator(url_str).IterBuckets(bucket_fields=['id']))
      else:
        # Get a list of objects and prefixes, expanding the top level for
        # any listed buckets.  If our source is a bucket, however, we need
        # to treat all of the top level expansions as names_container=True.
        post_step1_iter = PluralityCheckableIterator(
            self.WildcardIterator(url_str).IterAll(
                bucket_listing_fields=self.bucket_listing_fields,
                expand_top_level_buckets=True))
        if storage_url.IsCloudUrl() and storage_url.IsBucket():
          src_names_bucket = True

      src_url_expands_to_multi = post_step1_iter.HasPlurality()
      is_multi_top_level_source_request = (self.url_strs.has_plurality or
                                           src_url_expands_to_multi)

      # Step 2: Expand bucket subdirs. The output from this
      # step is an iterator of (names_container, BucketListingRef).
      # Starting with gs://bucket/abcd this step would expand to:
      #   iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
      subdir_exp_wildcard = self._flatness_wildcard[self.recursion_requested]
      if self.recursion_requested:
        post_step2_iter = _ImplicitBucketSubdirIterator(
            self, post_step1_iter, subdir_exp_wildcard,
            self.bucket_listing_fields)
      else:
        post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
      post_step2_iter = PluralityCheckableIterator(post_step2_iter)

      # Because we actually perform and check object listings here, this will
      # raise if url_args includes a non-existent object.  However,
      # plurality_checkable_iterator will buffer the exception for us, not
      # raising it until the iterator is actually asked to yield the first
      # result.
      if post_step2_iter.IsEmpty():
        if self.continue_on_error:
          try:
            raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
          except CommandException as e:
            # Yield a specialized tuple of (exception, stack_trace) to
            # the wrapping PluralityCheckableIterator.
            yield (e, sys.exc_info()[2])
        else:
          raise CommandException(NO_URLS_MATCHED_TARGET % url_str)

      # Step 3. Omit any directories, buckets, or bucket subdirectories for
      # non-recursive expansions.
      post_step3_iter = PluralityCheckableIterator(
          _OmitNonRecursiveIterator(post_step2_iter, self.recursion_requested,
                                    self.command_name,
                                    self.cmd_supports_recursion, self.logger))

      src_url_expands_to_multi = post_step3_iter.HasPlurality()
      is_multi_source_request = (self.url_strs.has_plurality or
                                 src_url_expands_to_multi)

      # Step 4. Expand directories and buckets. This step yields the iterated
      # values. Starting with gs://bucket this step would expand to:
      #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
      # Starting with file://dir this step would expand to:
      #  [dir/a.txt, dir/b.txt, dir/c/]
      for (names_container, blr) in post_step3_iter:
        src_names_container = src_names_bucket or names_container
        if blr.IsObject():
          yield NameExpansionResult(
              source_storage_url=storage_url,
              is_multi_source_request=is_multi_source_request,
              is_multi_top_level_source_request=
              is_multi_top_level_source_request,
              names_container=src_names_container,
              expanded_storage_url=blr.storage_url,
              expanded_result=blr.root_object)
        else:
          # Use implicit wildcarding to do the enumeration.
          # At this point we are guaranteed that:
          # - Recursion has been requested because non-object entries are
          #   filtered in step 3 otherwise.
          # - This is a prefix or bucket subdirectory because only
          #   non-recursive iterations product bucket references.
          expanded_url = StorageUrlFromString(blr.url_string)
          if expanded_url.IsFileUrl():
            # Convert dir to implicit recursive wildcard.
            url_to_iterate = '%s%s%s' % (blr, os.sep, subdir_exp_wildcard)
          else:
            # Convert subdir to implicit recursive wildcard.
            url_to_iterate = expanded_url.CreatePrefixUrl(
                wildcard_suffix=subdir_exp_wildcard)

          wc_iter = PluralityCheckableIterator(
              self.WildcardIterator(url_to_iterate).IterObjects(
                  bucket_listing_fields=self.bucket_listing_fields))
          src_url_expands_to_multi = (src_url_expands_to_multi or
                                      wc_iter.HasPlurality())
          is_multi_source_request = (self.url_strs.has_plurality or
                                     src_url_expands_to_multi)
          # This will be a flattened listing of all underlying objects in the
          # subdir.
          for blr in wc_iter:
            yield NameExpansionResult(
                source_storage_url=storage_url,
                is_multi_source_request=is_multi_source_request,
                is_multi_top_level_source_request=(
                    is_multi_top_level_source_request),
                names_container=True,
                expanded_storage_url=blr.storage_url,
                expanded_result=blr.root_object)