def __iter__()

in gslib/wildcard_iterator.py [0:0]


  def __iter__(self,
               bucket_listing_fields=None,
               expand_top_level_buckets=False):
    """Iterator that gets called when iterating over the cloud wildcard.

    In the case where no wildcard is present, returns a single matching object,
    single matching prefix, or one of each if both exist.

    Args:
      bucket_listing_fields: Iterable fields to include in bucket listings.
                             Ex. ['name', 'acl'].  Iterator is
                             responsible for converting these to list-style
                             format ['items/name', 'items/acl'] as well as
                             adding any fields necessary for listing such as
                             prefixes.  API implementation is responsible for
                             adding pagination fields.  If this is None,
                             all fields are returned.
      expand_top_level_buckets: If true, yield no BUCKET references.  Instead,
                                expand buckets into top-level objects and
                                prefixes.

    Yields:
      BucketListingRef of type BUCKET, OBJECT or PREFIX.
    """
    single_version_request = self.wildcard_url.HasGeneration()

    # For wildcard expansion purposes, we need at a minimum the name of
    # each object and prefix.  If we're not using the default of requesting
    # all fields, make sure at least these are requested.  The Cloud API
    # tolerates specifying the same field twice.
    get_fields = None
    if bucket_listing_fields:
      get_fields = set()
      for field in bucket_listing_fields:
        get_fields.add(field)
      bucket_listing_fields = self._GetToListFields(
          get_fields=bucket_listing_fields)
      bucket_listing_fields.update(['items/name', 'prefixes'])
      get_fields.update(['name'])
      # If we're making versioned requests, ensure generation and
      # metageneration are also included.
      if single_version_request or self.all_versions:
        bucket_listing_fields.update(
            ['items/generation', 'items/metageneration'])
        get_fields.update(['generation', 'metageneration'])

    # Handle bucket wildcarding, if any, in _ExpandBucketWildcards. Then
    # iterate over the expanded bucket strings and handle any object
    # wildcarding.
    for bucket_listing_ref in self._ExpandBucketWildcards(bucket_fields=['id']):
      bucket_url_string = bucket_listing_ref.url_string
      if self.wildcard_url.IsBucket():
        # IsBucket() guarantees there are no prefix or object wildcards, and
        # thus this is a top-level listing of buckets.
        if expand_top_level_buckets:
          url = StorageUrlFromString(bucket_url_string)
          for obj_or_prefix in self.gsutil_api.ListObjects(
              url.bucket_name,
              delimiter='/',
              all_versions=self.all_versions,
              provider=self.wildcard_url.scheme,
              fields=bucket_listing_fields):
            if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
              yield self._GetObjectRef(bucket_url_string,
                                       obj_or_prefix.data,
                                       with_version=self.all_versions)
            else:  # CloudApi.CsObjectOrPrefixType.PREFIX:
              yield self._GetPrefixRef(bucket_url_string, obj_or_prefix.data)
        else:
          yield bucket_listing_ref
      else:
        # By default, assume a non-wildcarded URL is an object, not a prefix.
        # This prevents unnecessary listings (which are slower, more expensive,
        # and also subject to eventual consistency).
        if (not ContainsWildcard(self.wildcard_url.url_string) and
            self.wildcard_url.IsObject() and not self.all_versions):
          try:
            get_object = self.gsutil_api.GetObjectMetadata(
                self.wildcard_url.bucket_name,
                self.wildcard_url.object_name,
                generation=self.wildcard_url.generation,
                provider=self.wildcard_url.scheme,
                fields=get_fields)
            yield self._GetObjectRef(self.wildcard_url.bucket_url_string,
                                     get_object,
                                     with_version=(self.all_versions or
                                                   single_version_request))
            return
          except (NotFoundException, AccessDeniedException):
            # It's possible this is a prefix - try to list instead.
            pass

        # Expand iteratively by building prefix/delimiter bucket listing
        # request, filtering the results per the current level's wildcard
        # (if present), and continuing with the next component of the
        # wildcard. See _BuildBucketFilterStrings() documentation for details.
        if single_version_request:
          url_string = '%s%s#%s' % (bucket_url_string,
                                    self.wildcard_url.object_name,
                                    self.wildcard_url.generation)
        else:
          # Rstrip any prefixes to correspond with rstripped prefix wildcard
          # from _BuildBucketFilterStrings().
          url_string = '%s%s' % (
              bucket_url_string, StripOneSlash(self.wildcard_url.object_name) or
              '/')  # Cover root object named '/' case.
        urls_needing_expansion = [url_string]
        while urls_needing_expansion:
          url = StorageUrlFromString(urls_needing_expansion.pop(0))
          (prefix, delimiter, prefix_wildcard,
           suffix_wildcard) = (self._BuildBucketFilterStrings(url.object_name))
          regex_patterns = self._GetRegexPatterns(prefix_wildcard)

          # If we have a suffix wildcard, we only care about listing prefixes.
          listing_fields = (set(['prefixes'])
                            if suffix_wildcard else bucket_listing_fields)

          # List bucket for objects matching prefix up to delimiter.
          for obj_or_prefix in self.gsutil_api.ListObjects(
              url.bucket_name,
              prefix=prefix,
              delimiter=delimiter,
              all_versions=self.all_versions or single_version_request,
              provider=self.wildcard_url.scheme,
              fields=listing_fields):
            for pattern in regex_patterns:
              if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                gcs_object = obj_or_prefix.data
                if pattern.match(gcs_object.name):
                  if not suffix_wildcard or (StripOneSlash(gcs_object.name)
                                             == suffix_wildcard):
                    if not single_version_request or (
                        self._SingleVersionMatches(gcs_object.generation)):
                      yield self._GetObjectRef(
                          bucket_url_string,
                          gcs_object,
                          with_version=(self.all_versions or
                                        single_version_request))
                  break
              else:  # CloudApi.CsObjectOrPrefixType.PREFIX
                prefix = obj_or_prefix.data

                if ContainsWildcard(prefix):
                  # TODO: Disambiguate user-supplied strings from iterated
                  # prefix and object names so that we can better reason
                  # about wildcards and handle this case without raising
                  # an error.
                  raise CommandException(
                      'Cloud folder %s%s contains a wildcard; gsutil does '
                      'not currently support objects with wildcards in their '
                      'name.' % (bucket_url_string, prefix))

                # If the prefix ends with a slash, remove it.  Note that we only
                # remove one slash so that we can successfully enumerate dirs
                # containing multiple slashes.
                rstripped_prefix = StripOneSlash(prefix)
                if pattern.match(rstripped_prefix):
                  if suffix_wildcard and rstripped_prefix != suffix_wildcard:
                    # There's more wildcard left to expand.
                    url_append_string = '%s%s' % (bucket_url_string,
                                                  rstripped_prefix + '/' +
                                                  suffix_wildcard)
                    urls_needing_expansion.append(url_append_string)
                  else:
                    # No wildcard to expand, just yield the prefix.
                    yield self._GetPrefixRef(bucket_url_string, prefix)
                  break