def ConstructDstUrl()

in gslib/utils/copy_helper.py [0:0]
65 lines of code
35 McCabe index (conditional complexity)

def ConstructDstUrl(src_url,
                    exp_src_url,
                    src_url_names_container,
                    have_multiple_srcs,
                    has_multiple_top_level_srcs,
                    exp_dst_url,
                    have_existing_dest_subdir,
                    recursion_requested,
                    preserve_posix=False):
  """Constructs the destination URL for a given exp_src_url/exp_dst_url pair.

  Uses context-dependent naming rules that mimic Linux cp and mv behavior.

  Args:
    src_url: Source StorageUrl to be copied.
    exp_src_url: Single StorageUrl from wildcard expansion of src_url.
    src_url_names_container: True if src_url names a container (including the
        case of a wildcard-named bucket subdir (like gs://bucket/abc,
        where gs://bucket/abc/* matched some objects).
    have_multiple_srcs: True if this is a multi-source request. This can be
        true if src_url wildcard-expanded to multiple URLs or if there were
        multiple source URLs in the request.
    has_multiple_top_level_srcs: Same as have_multiple_srcs but measured
        before recursion.
    exp_dst_url: the expanded StorageUrl requested for the cp destination.
        Final written path is constructed from this plus a context-dependent
        variant of src_url.
    have_existing_dest_subdir: bool indicator whether dest is an existing
      subdirectory.
    recursion_requested: True if a recursive operation has been requested.
    preserve_posix: True if preservation of posix attributes has been requested.

  Returns:
    StorageUrl to use for copy.

  Raises:
    CommandException if destination object name not specified for
    source and source is a stream.
  """
  if (exp_dst_url.IsFileUrl() and exp_dst_url.IsStream() and preserve_posix):
    raise CommandException('Cannot preserve POSIX attributes with a stream.')

  if _ShouldTreatDstUrlAsSingleton(src_url_names_container, have_multiple_srcs,
                                   have_existing_dest_subdir, exp_dst_url,
                                   recursion_requested):
    # We're copying one file or object to one file or object.
    return exp_dst_url

  if exp_src_url.IsFileUrl() and (exp_src_url.IsStream() or
                                  exp_src_url.IsFifo()):
    if have_existing_dest_subdir:
      type_text = 'stream' if exp_src_url.IsStream() else 'named pipe'
      raise CommandException('Destination object name needed when '
                             'source is a %s' % type_text)
    return exp_dst_url

  if not recursion_requested and not have_multiple_srcs:
    # We're copying one file or object to a subdirectory. Append final comp
    # of exp_src_url to exp_dst_url.
    src_final_comp = exp_src_url.object_name.rpartition(src_url.delim)[-1]
    return StorageUrlFromString('%s%s%s' % (exp_dst_url.url_string.rstrip(
        exp_dst_url.delim), exp_dst_url.delim, src_final_comp))

  # Else we're copying multiple sources to a directory, bucket, or a bucket
  # "sub-directory".

  # Ensure exp_dst_url ends in delim char if we're doing a multi-src copy or
  # a copy to a directory. (The check for copying to a directory needs
  # special-case handling so that the command:
  #   gsutil cp gs://bucket/obj dir
  # will turn into file://dir/ instead of file://dir -- the latter would cause
  # the file "dirobj" to be created.)
  # Note: need to check have_multiple_srcs or src_url.names_container()
  # because src_url could be a bucket containing a single object, named
  # as gs://bucket.
  if ((have_multiple_srcs or src_url_names_container or
       (exp_dst_url.IsFileUrl() and exp_dst_url.IsDirectory())) and
      not exp_dst_url.url_string.endswith(exp_dst_url.delim)):
    exp_dst_url = StorageUrlFromString(
        '%s%s' % (exp_dst_url.url_string, exp_dst_url.delim))

  src_url_is_valid_parent = _IsUrlValidParentDir(src_url)
  if not src_url_is_valid_parent and has_multiple_top_level_srcs:
    # To avoid top-level name conflicts, we need to copy the parent dir.
    # However, that cannot be done because the parent dir has an invalid name.
    raise InvalidUrlError(
        'Presence of multiple top-level sources and invalid expanded URL'
        ' make file name conflicts possible for URL: {}'.format(src_url))

  # Making naming behavior match how things work with local Linux cp and mv
  # operations depends on many factors, including whether the destination is a
  # container, and the plurality of the source(s).
  # 1. Recursively copying from directories, buckets, or bucket subdirs should
  #    result in objects/files mirroring the source hierarchy. For example:
  #      gsutil cp -r dir1/dir2 gs://bucket
  #    should create the object gs://bucket/dir2/file2, assuming dir1/dir2
  #    contains file2).
  #
  #    To be consistent with Linux cp behavior, there's one more wrinkle when
  #    working with subdirs: The resulting object names depend on whether the
  #    destination subdirectory exists. For example, if gs://bucket/subdir
  #    exists, the command:
  #      gsutil cp -r dir1/dir2 gs://bucket/subdir
  #    should create objects named like gs://bucket/subdir/dir2/a/b/c. In
  #    contrast, if gs://bucket/subdir does not exist, this same command
  #    should create objects named like gs://bucket/subdir/a/b/c.
  #
  #    If there are multiple top-level source items, preserve source parent
  #    dirs. This is similar to when the destination dir already exists and
  #    avoids conflicts such as "dir1/f.txt" and "dir2/f.txt" both getting
  #    copied to "gs://bucket/f.txt". Linux normally errors on these conflicts,
  #    but we cannot do that because we need to give users the ability to create
  #    dirs as they copy to the cloud.
  #
  #    Note: "mv" is similar to running "cp -r" followed by source deletion.
  #
  # 2. Copying individual files or objects to dirs, buckets or bucket subdirs
  #    should result in objects/files named by the final source file name
  #    component. Example:
  #      gsutil cp dir1/*.txt gs://bucket
  #    should create the objects gs://bucket/f1.txt and gs://bucket/f2.txt,
  #    assuming dir1 contains f1.txt and f2.txt.

  # Ignore the "multiple top-level sources" rule if using double wildcard **
  # because that treats all files as top-level, in which case the user doesn't
  # want to preserve directories.
  preserve_src_top_level_dirs = ('**' not in src_url.versionless_url_string and
                                 src_url_is_valid_parent and
                                 (has_multiple_top_level_srcs or
                                  have_existing_dest_subdir))
  if preserve_src_top_level_dirs or (src_url_names_container and
                                     (exp_dst_url.IsCloudUrl() or
                                      exp_dst_url.IsDirectory())):
    # Case 1.  Container copy to a destination other than a file.
    # Build dst_key_name from subpath of exp_src_url past
    # where src_url ends. For example, for src_url=gs://bucket/ and
    # exp_src_url=gs://bucket/src_subdir/obj, dst_key_name should be
    # src_subdir/obj.
    src_url_path_sans_final_dir = GetPathBeforeFinalDir(src_url, exp_src_url)
    dst_key_name = exp_src_url.versionless_url_string[
        len(src_url_path_sans_final_dir):].lstrip(src_url.delim)

    if not preserve_src_top_level_dirs:
      # Only copy file name, not parent dir.
      dst_key_name = dst_key_name.partition(src_url.delim)[-1]

  else:
    # Case 2.
    dst_key_name = exp_src_url.object_name.rpartition(src_url.delim)[-1]

  if (exp_dst_url.IsFileUrl() or _ShouldTreatDstUrlAsBucketSubDir(
      have_multiple_srcs, exp_dst_url, have_existing_dest_subdir,
      src_url_names_container, recursion_requested)):
    if exp_dst_url.object_name and exp_dst_url.object_name.endswith(
        exp_dst_url.delim):
      dst_key_name = '%s%s%s' % (exp_dst_url.object_name.rstrip(
          exp_dst_url.delim), exp_dst_url.delim, dst_key_name)
    else:
      delim = exp_dst_url.delim if exp_dst_url.object_name else ''
      dst_key_name = '%s%s%s' % (exp_dst_url.object_name or
                                 '', delim, dst_key_name)

  new_exp_dst_url = exp_dst_url.Clone()
  new_exp_dst_url.object_name = dst_key_name.replace(src_url.delim,
                                                     exp_dst_url.delim)
  return new_exp_dst_url