in gslib/utils/copy_helper.py [0:0]
def ConstructDstUrl(src_url,
exp_src_url,
src_url_names_container,
have_multiple_srcs,
has_multiple_top_level_srcs,
exp_dst_url,
have_existing_dest_subdir,
recursion_requested,
preserve_posix=False):
"""Constructs the destination URL for a given exp_src_url/exp_dst_url pair.
Uses context-dependent naming rules that mimic Linux cp and mv behavior.
Args:
src_url: Source StorageUrl to be copied.
exp_src_url: Single StorageUrl from wildcard expansion of src_url.
src_url_names_container: True if src_url names a container (including the
case of a wildcard-named bucket subdir (like gs://bucket/abc,
where gs://bucket/abc/* matched some objects).
have_multiple_srcs: True if this is a multi-source request. This can be
true if src_url wildcard-expanded to multiple URLs or if there were
multiple source URLs in the request.
has_multiple_top_level_srcs: Same as have_multiple_srcs but measured
before recursion.
exp_dst_url: the expanded StorageUrl requested for the cp destination.
Final written path is constructed from this plus a context-dependent
variant of src_url.
have_existing_dest_subdir: bool indicator whether dest is an existing
subdirectory.
recursion_requested: True if a recursive operation has been requested.
preserve_posix: True if preservation of posix attributes has been requested.
Returns:
StorageUrl to use for copy.
Raises:
CommandException if destination object name not specified for
source and source is a stream.
"""
if (exp_dst_url.IsFileUrl() and exp_dst_url.IsStream() and preserve_posix):
raise CommandException('Cannot preserve POSIX attributes with a stream.')
if _ShouldTreatDstUrlAsSingleton(src_url_names_container, have_multiple_srcs,
have_existing_dest_subdir, exp_dst_url,
recursion_requested):
# We're copying one file or object to one file or object.
return exp_dst_url
if exp_src_url.IsFileUrl() and (exp_src_url.IsStream() or
exp_src_url.IsFifo()):
if have_existing_dest_subdir:
type_text = 'stream' if exp_src_url.IsStream() else 'named pipe'
raise CommandException('Destination object name needed when '
'source is a %s' % type_text)
return exp_dst_url
if not recursion_requested and not have_multiple_srcs:
# We're copying one file or object to a subdirectory. Append final comp
# of exp_src_url to exp_dst_url.
src_final_comp = exp_src_url.object_name.rpartition(src_url.delim)[-1]
return StorageUrlFromString('%s%s%s' % (exp_dst_url.url_string.rstrip(
exp_dst_url.delim), exp_dst_url.delim, src_final_comp))
# Else we're copying multiple sources to a directory, bucket, or a bucket
# "sub-directory".
# Ensure exp_dst_url ends in delim char if we're doing a multi-src copy or
# a copy to a directory. (The check for copying to a directory needs
# special-case handling so that the command:
# gsutil cp gs://bucket/obj dir
# will turn into file://dir/ instead of file://dir -- the latter would cause
# the file "dirobj" to be created.)
# Note: need to check have_multiple_srcs or src_url.names_container()
# because src_url could be a bucket containing a single object, named
# as gs://bucket.
if ((have_multiple_srcs or src_url_names_container or
(exp_dst_url.IsFileUrl() and exp_dst_url.IsDirectory())) and
not exp_dst_url.url_string.endswith(exp_dst_url.delim)):
exp_dst_url = StorageUrlFromString(
'%s%s' % (exp_dst_url.url_string, exp_dst_url.delim))
src_url_is_valid_parent = _IsUrlValidParentDir(src_url)
if not src_url_is_valid_parent and has_multiple_top_level_srcs:
# To avoid top-level name conflicts, we need to copy the parent dir.
# However, that cannot be done because the parent dir has an invalid name.
raise InvalidUrlError(
'Presence of multiple top-level sources and invalid expanded URL'
' make file name conflicts possible for URL: {}'.format(src_url))
# Making naming behavior match how things work with local Linux cp and mv
# operations depends on many factors, including whether the destination is a
# container, and the plurality of the source(s).
# 1. Recursively copying from directories, buckets, or bucket subdirs should
# result in objects/files mirroring the source hierarchy. For example:
# gsutil cp -r dir1/dir2 gs://bucket
# should create the object gs://bucket/dir2/file2, assuming dir1/dir2
# contains file2).
#
# To be consistent with Linux cp behavior, there's one more wrinkle when
# working with subdirs: The resulting object names depend on whether the
# destination subdirectory exists. For example, if gs://bucket/subdir
# exists, the command:
# gsutil cp -r dir1/dir2 gs://bucket/subdir
# should create objects named like gs://bucket/subdir/dir2/a/b/c. In
# contrast, if gs://bucket/subdir does not exist, this same command
# should create objects named like gs://bucket/subdir/a/b/c.
#
# If there are multiple top-level source items, preserve source parent
# dirs. This is similar to when the destination dir already exists and
# avoids conflicts such as "dir1/f.txt" and "dir2/f.txt" both getting
# copied to "gs://bucket/f.txt". Linux normally errors on these conflicts,
# but we cannot do that because we need to give users the ability to create
# dirs as they copy to the cloud.
#
# Note: "mv" is similar to running "cp -r" followed by source deletion.
#
# 2. Copying individual files or objects to dirs, buckets or bucket subdirs
# should result in objects/files named by the final source file name
# component. Example:
# gsutil cp dir1/*.txt gs://bucket
# should create the objects gs://bucket/f1.txt and gs://bucket/f2.txt,
# assuming dir1 contains f1.txt and f2.txt.
# Ignore the "multiple top-level sources" rule if using double wildcard **
# because that treats all files as top-level, in which case the user doesn't
# want to preserve directories.
preserve_src_top_level_dirs = ('**' not in src_url.versionless_url_string and
src_url_is_valid_parent and
(has_multiple_top_level_srcs or
have_existing_dest_subdir))
if preserve_src_top_level_dirs or (src_url_names_container and
(exp_dst_url.IsCloudUrl() or
exp_dst_url.IsDirectory())):
# Case 1. Container copy to a destination other than a file.
# Build dst_key_name from subpath of exp_src_url past
# where src_url ends. For example, for src_url=gs://bucket/ and
# exp_src_url=gs://bucket/src_subdir/obj, dst_key_name should be
# src_subdir/obj.
src_url_path_sans_final_dir = GetPathBeforeFinalDir(src_url, exp_src_url)
dst_key_name = exp_src_url.versionless_url_string[
len(src_url_path_sans_final_dir):].lstrip(src_url.delim)
if not preserve_src_top_level_dirs:
# Only copy file name, not parent dir.
dst_key_name = dst_key_name.partition(src_url.delim)[-1]
else:
# Case 2.
dst_key_name = exp_src_url.object_name.rpartition(src_url.delim)[-1]
if (exp_dst_url.IsFileUrl() or _ShouldTreatDstUrlAsBucketSubDir(
have_multiple_srcs, exp_dst_url, have_existing_dest_subdir,
src_url_names_container, recursion_requested)):
if exp_dst_url.object_name and exp_dst_url.object_name.endswith(
exp_dst_url.delim):
dst_key_name = '%s%s%s' % (exp_dst_url.object_name.rstrip(
exp_dst_url.delim), exp_dst_url.delim, dst_key_name)
else:
delim = exp_dst_url.delim if exp_dst_url.object_name else ''
dst_key_name = '%s%s%s' % (exp_dst_url.object_name or
'', delim, dst_key_name)
new_exp_dst_url = exp_dst_url.Clone()
new_exp_dst_url.object_name = dst_key_name.replace(src_url.delim,
exp_dst_url.delim)
return new_exp_dst_url