def ExpandUrlToSingleBlr()

in gslib/utils/copy_helper.py [0:0]


def ExpandUrlToSingleBlr(url_str,
                         gsutil_api,
                         project_id,
                         treat_nonexistent_object_as_subdir=False,
                         logger=None):
  """Expands wildcard if present in url_str.

  Args:
    url_str: String representation of requested url.
    gsutil_api: gsutil Cloud API instance to use.
    project_id: project ID to use (for iterators).
    treat_nonexistent_object_as_subdir: indicates if should treat a non-existent
        object as a subdir.
    logger: logging.Logger instance to use for output. If None, the root Logger
        will be used.

  Returns:
      (exp_url, have_existing_dst_container)
      where exp_url is a StorageUrl
      and have_existing_dst_container is a bool indicating whether
      exp_url names an existing directory, bucket, or bucket subdirectory.
      In the case where we match a subdirectory AND an object, the
      object is returned.

  Raises:
    CommandException: if url_str matched more than 1 URL.
  """
  logger = logger or logging.Logger()
  # Handle wildcarded url case.
  if ContainsWildcard(url_str):
    blr_expansion = list(
        CreateWildcardIterator(url_str,
                               gsutil_api,
                               project_id=project_id,
                               logger=logger))
    if len(blr_expansion) != 1:
      raise CommandException('Destination (%s) must match exactly 1 URL' %
                             url_str)
    blr = blr_expansion[0]
    # BLR is either an OBJECT, PREFIX, or BUCKET; the latter two represent
    # directories.
    return (StorageUrlFromString(blr.url_string), not blr.IsObject())

  storage_url = StorageUrlFromString(url_str)

  # Handle non-wildcarded URL.
  if storage_url.IsFileUrl():
    return (storage_url, storage_url.IsDirectory())

  # At this point we have a cloud URL.
  if storage_url.IsBucket():
    return (storage_url, True)

  # For object/prefix URLs, there are four cases that indicate the destination
  # is a cloud subdirectory; these are always considered to be an existing
  # container. Checking each case allows gsutil to provide Unix-like
  # destination folder semantics, but requires up to three HTTP calls, noted
  # below.

  # Case 1: If a placeholder object ending with '/' exists.
  if IsCloudSubdirPlaceholder(storage_url):
    return (storage_url, True)

  # Get version of object name without trailing slash for matching prefixes
  prefix = storage_url.object_name.rstrip('/')

  # HTTP call to make an eventually consistent check for a matching prefix,
  # _$folder$, or empty listing.
  list_iterator = gsutil_api.ListObjects(storage_url.bucket_name,
                                         prefix=prefix,
                                         delimiter='/',
                                         provider=storage_url.scheme,
                                         fields=['prefixes', 'items/name'])
  for obj_or_prefix in list_iterator:
    # To conserve HTTP calls for the common case, we make a single listing
    # that covers prefixes and object names. Listing object names covers the
    # _$folder$ case and the nonexistent-object-as-subdir case. However, if
    # there are many existing objects for which the target URL is an exact
    # prefix, this listing could be paginated and span multiple HTTP calls.
    # If this case becomes common, we could heurestically abort the
    # listing operation after the first page of results and just query for the
    # _$folder$ object directly using GetObjectMetadata.
    # TODO: currently the ListObjects iterator yields objects before prefixes,
    # because ls depends on this iteration order for proper display.  We could
    # save up to 1ms in determining that a destination is a prefix if we had a
    # way to yield prefixes first, but this would require poking a major hole
    # through the abstraction to control this iteration order.
    if (obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.PREFIX and
        obj_or_prefix.data == prefix + '/'):
      # Case 2: If there is a matching prefix when listing the destination URL.
      return (storage_url, True)
    elif (obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT and
          obj_or_prefix.data.name == storage_url.object_name + '_$folder$'):
      # Case 3: If a placeholder object matching destination + _$folder$
      # exists.
      return (storage_url, True)
    elif (obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT and
          obj_or_prefix.data.name == storage_url.object_name):
      # The object exists but it is not a container
      return (storage_url, False)

  # Case 4: If no objects/prefixes matched, and nonexistent objects should be
  # treated as subdirectories.
  return (storage_url, treat_nonexistent_object_as_subdir)