gslib/commands/cp.py (577 lines of code) (raw):
# -*- coding: utf-8 -*-
# Copyright 2011 Google Inc. All Rights Reserved.
# Copyright 2011, Nexenta Systems Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of Unix-like cp command for cloud storage providers."""
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals
import errno
import itertools
import logging
import os
import time
import traceback
from apitools.base.py import encoding
from gslib import gcs_json_api
from gslib.command import Command
from gslib.command_argument import CommandArgument
from gslib.cs_api_map import ApiSelector
from gslib.exception import CommandException
from gslib.metrics import LogPerformanceSummaryParams
from gslib.name_expansion import CopyObjectsIterator
from gslib.name_expansion import DestinationInfo
from gslib.name_expansion import NameExpansionIterator
from gslib.name_expansion import NameExpansionIteratorDestinationTuple
from gslib.name_expansion import SeekAheadNameExpansionIterator
from gslib.storage_url import ContainsWildcard
from gslib.storage_url import IsCloudSubdirPlaceholder
from gslib.storage_url import StorageUrlFromString
from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
from gslib.utils import cat_helper
from gslib.utils import copy_helper
from gslib.utils import parallelism_framework_util
from gslib.utils.cloud_api_helper import GetCloudApiInstance
from gslib.utils.constants import DEBUGLEVEL_DUMP_REQUESTS
from gslib.utils.constants import NO_MAX
from gslib.utils.copy_helper import CreateCopyHelperOpts
from gslib.utils.copy_helper import GetSourceFieldsNeededForCopy
from gslib.utils.copy_helper import GZIP_ALL_FILES
from gslib.utils.copy_helper import ItemExistsError
from gslib.utils.copy_helper import Manifest
from gslib.utils.copy_helper import SkipUnsupportedObjectError
from gslib.utils.posix_util import ConvertModeToBase8
from gslib.utils.posix_util import DeserializeFileAttributesFromObjectMetadata
from gslib.utils.posix_util import InitializePreservePosixData
from gslib.utils.posix_util import POSIXAttributes
from gslib.utils.posix_util import SerializeFileAttributesToObjectMetadata
from gslib.utils.posix_util import ValidateFilePermissionAccess
from gslib.utils.shim_util import GcloudStorageFlag
from gslib.utils.shim_util import GcloudStorageMap
from gslib.utils.system_util import GetStreamFromFileUrl
from gslib.utils.system_util import StdinIterator
from gslib.utils.system_util import StdinIteratorCls
from gslib.utils.text_util import NormalizeStorageClass
from gslib.utils.text_util import RemoveCRLFFromString
from gslib.utils.unit_util import CalculateThroughput
from gslib.utils.unit_util import MakeHumanReadable
_SYNOPSIS = """
gsutil cp [OPTION]... src_url dst_url
gsutil cp [OPTION]... src_url... dst_url
gsutil cp [OPTION]... -I dst_url
"""
_SYNOPSIS_TEXT = """
<B>SYNOPSIS</B>
""" + _SYNOPSIS
_DESCRIPTION_TEXT = """
<B>DESCRIPTION</B>
The ``gsutil cp`` command allows you to copy data between your local file
system and the cloud, within the cloud, and between
cloud storage providers. For example, to upload all text files from the
local directory to a bucket, you can run:
gsutil cp *.txt gs://my-bucket
You can also download data from a bucket. The following command downloads
all text files from the top-level of a bucket to your current directory:
gsutil cp gs://my-bucket/*.txt .
You can use the ``-n`` option to prevent overwriting the content of
existing files. The following example downloads text files from a bucket
without clobbering the data in your directory:
gsutil cp -n gs://my-bucket/*.txt .
Use the ``-r`` option to copy an entire directory tree.
For example, to upload the directory tree ``dir``:
gsutil cp -r dir gs://my-bucket
If you have a large number of files to transfer, you can perform a parallel
multi-threaded/multi-processing copy using the
top-level gsutil ``-m`` option (see "gsutil help options"):
gsutil -m cp -r dir gs://my-bucket
You can use the ``-I`` option with ``stdin`` to specify a list of URLs to
copy, one per line. This allows you to use gsutil
in a pipeline to upload or download objects as generated by a program:
cat filelist | gsutil -m cp -I gs://my-bucket
or:
cat filelist | gsutil -m cp -I ./download_dir
where the output of ``cat filelist`` is a list of files, cloud URLs, and
wildcards of files and cloud URLs.
NOTE: Shells like ``bash`` and ``zsh`` sometimes attempt to expand
wildcards in ways that can be surprising. You may also encounter issues when
attempting to copy files whose names contain wildcard characters. For more
details about these issues, see `Wildcard behavior considerations
<https://cloud.google.com/storage/docs/wildcards#surprising-behavior>`_.
"""
_NAME_CONSTRUCTION_TEXT = """
<B>HOW NAMES ARE CONSTRUCTED</B>
The ``gsutil cp`` command attempts to name objects in ways that are consistent with the
Linux ``cp`` command. This means that names are constructed depending
on whether you're performing a recursive directory copy or copying
individually-named objects, or whether you're copying to an existing or
non-existent directory.
When you perform recursive directory copies, object names are constructed to
mirror the source directory structure starting at the point of recursive
processing. For example, if ``dir1/dir2`` contains the file ``a/b/c``, then the
following command creates the object ``gs://my-bucket/dir2/a/b/c``:
gsutil cp -r dir1/dir2 gs://my-bucket
In contrast, copying individually-named files results in objects named by
the final path component of the source files. For example, assuming again that
``dir1/dir2`` contains ``a/b/c``, the following command creates the object
``gs://my-bucket/c``:
gsutil cp dir1/dir2/** gs://my-bucket
Note that in the above example, the '**' wildcard matches all names
anywhere under ``dir``. The wildcard '*' matches names just one level deep. For
more details, see `URI wildcards
<https://cloud.google.com/storage/docs/wildcards#surprising-behavior>`_.
The same rules apply for uploads and downloads: recursive copies of buckets and
bucket subdirectories produce a mirrored filename structure, while copying
individually or wildcard-named objects produce flatly-named files.
In addition, the resulting names depend on whether the destination subdirectory
exists. For example, if ``gs://my-bucket/subdir`` exists as a subdirectory,
the following command creates the object ``gs://my-bucket/subdir/dir2/a/b/c``:
gsutil cp -r dir1/dir2 gs://my-bucket/subdir
In contrast, if ``gs://my-bucket/subdir`` does not exist, this same ``gsutil cp``
command creates the object ``gs://my-bucket/subdir/a/b/c``.
NOTE: The
`Google Cloud Platform Console <https://console.cloud.google.com>`_
creates folders by creating "placeholder" objects that end
with a "/" character. gsutil skips these objects when downloading from the
cloud to the local file system, because creating a file that
ends with a "/" is not allowed on Linux and macOS. We
recommend that you only create objects that end with "/" if you don't
intend to download such objects using gsutil.
"""
_SUBDIRECTORIES_TEXT = """
<B>COPYING TO/FROM SUBDIRECTORIES; DISTRIBUTING TRANSFERS ACROSS MACHINES</B>
You can use gsutil to copy to and from subdirectories by using a command
like this:
gsutil cp -r dir gs://my-bucket/data
This causes ``dir`` and all of its files and nested subdirectories to be
copied under the specified destination, resulting in objects with names like
``gs://my-bucket/data/dir/a/b/c``. Similarly, you can download from bucket
subdirectories using the following command:
gsutil cp -r gs://my-bucket/data dir
This causes everything nested under ``gs://my-bucket/data`` to be downloaded
into ``dir``, resulting in files with names like ``dir/data/a/b/c``.
Copying subdirectories is useful if you want to add data to an existing
bucket directory structure over time. It's also useful if you want
to parallelize uploads and downloads across multiple machines (potentially
reducing overall transfer time compared with running ``gsutil -m
cp`` on one machine). For example, if your bucket contains this structure:
gs://my-bucket/data/result_set_01/
gs://my-bucket/data/result_set_02/
...
gs://my-bucket/data/result_set_99/
you can perform concurrent downloads across 3 machines by running these
commands on each machine, respectively:
gsutil -m cp -r gs://my-bucket/data/result_set_[0-3]* dir
gsutil -m cp -r gs://my-bucket/data/result_set_[4-6]* dir
gsutil -m cp -r gs://my-bucket/data/result_set_[7-9]* dir
Note that ``dir`` could be a local directory on each machine, or a
directory mounted off of a shared file server. The performance of the latter
depends on several factors, so we recommend experimenting
to find out what works best for your computing environment.
"""
_COPY_IN_CLOUD_TEXT = """
<B>COPYING IN THE CLOUD AND METADATA PRESERVATION</B>
If both the source and destination URL are cloud URLs from the same
provider, gsutil copies data "in the cloud" (without downloading
to and uploading from the machine where you run gsutil). In addition to
the performance and cost advantages of doing this, copying in the cloud
preserves metadata such as ``Content-Type`` and ``Cache-Control``. In contrast,
when you download data from the cloud, it ends up in a file with
no associated metadata, unless you have some way to keep
or re-create that metadata.
Copies spanning locations and/or storage classes cause data to be rewritten
in the cloud, which may take some time (but is still faster than
downloading and re-uploading). Such operations can be resumed with the same
command if they are interrupted, so long as the command parameters are
identical.
Note that by default, the gsutil ``cp`` command does not copy the object
ACL to the new object, and instead uses the default bucket ACL (see
"gsutil help defacl"). You can override this behavior with the ``-p``
option.
When copying in the cloud, if the destination bucket has Object Versioning
enabled, by default ``gsutil cp`` copies only live versions of the
source object. For example, the following command causes only the single live
version of ``gs://bucket1/obj`` to be copied to ``gs://bucket2``, even if there
are noncurrent versions of ``gs://bucket1/obj``:
gsutil cp gs://bucket1/obj gs://bucket2
To also copy noncurrent versions, use the ``-A`` flag:
gsutil cp -A gs://bucket1/obj gs://bucket2
The top-level gsutil ``-m`` flag is not allowed when using the ``cp -A`` flag.
"""
_CHECKSUM_VALIDATION_TEXT = """
<B>CHECKSUM VALIDATION</B>
gsutil automatically performs checksum validation for copies to and from Cloud
Storage. For more information, see `Hashes and ETags
<https://cloud.google.com/storage/docs/hashes-etags#cli>`_.
"""
_RETRY_HANDLING_TEXT = """
<B>RETRY HANDLING</B>
The ``cp`` command retries when failures occur, but if enough failures happen
during a particular copy or delete operation, or if a failure isn't retryable,
the ``cp`` command skips that object and moves on. If any failures were not
successfully retried by the end of the copy run, the ``cp`` command reports the
number of failures and exits with a non-zero status.
For details about gsutil's overall retry handling, see `Retry strategy
<https://cloud.google.com/storage/docs/retry-strategy#tools>`_.
"""
_RESUMABLE_TRANSFERS_TEXT = """
<B>RESUMABLE TRANSFERS</B>
gsutil automatically resumes interrupted downloads and interrupted `resumable
uploads <https://cloud.google.com/storage/docs/resumable-uploads#gsutil>`_,
except when performing streaming transfers. In the case of an interrupted
download, a partially downloaded temporary file is visible in the destination
directory with the suffix ``_.gstmp`` in its name. Upon completion, the
original file is deleted and replaced with the downloaded contents.
Resumable transfers store state information in files under
~/.gsutil, named by the destination object or file.
See "gsutil help prod" for details on using resumable transfers
in production.
"""
_STREAMING_TRANSFERS_TEXT = """
<B>STREAMING TRANSFERS</B>
Use '-' in place of src_url or dst_url to perform a `streaming transfer
<https://cloud.google.com/storage/docs/streaming>`_.
Streaming uploads using the `JSON API
<https://cloud.google.com/storage/docs/request-endpoints#gsutil>`_ are buffered
in memory part-way back into the file and can thus sometimes resume in the event
of network or service problems.
gsutil does not support resuming streaming uploads using the XML API or
resuming streaming downloads for either JSON or XML. If you have a large amount
of data to transfer in these cases, we recommend that you write the data to a
local file and copy that file rather than streaming it.
"""
_SLICED_OBJECT_DOWNLOADS_TEXT = """
<B>SLICED OBJECT DOWNLOADS</B>
gsutil can automatically use ranged ``GET`` requests to perform downloads in
parallel for large files being downloaded from Cloud Storage. See `sliced object
download documentation
<https://cloud.google.com/storage/docs/sliced-object-downloads>`_
for a complete discussion.
"""
_PARALLEL_COMPOSITE_UPLOADS_TEXT = """
<B>PARALLEL COMPOSITE UPLOADS</B>
gsutil can automatically use
`object composition <https://cloud.google.com/storage/docs/composite-objects>`_
to perform uploads in parallel for large, local files being uploaded to
Cloud Storage. See the `parallel composite uploads documentation
<https://cloud.google.com/storage/docs/parallel-composite-uploads>`_ for a
complete discussion.
"""
_CHANGING_TEMP_DIRECTORIES_TEXT = """
<B>CHANGING TEMP DIRECTORIES</B>
gsutil writes data to a temporary directory in several cases:
- when compressing data to be uploaded (see the ``-z`` and ``-Z`` options)
- when decompressing data being downloaded (for example, when the data has
``Content-Encoding:gzip`` as a result of being uploaded
using gsutil cp -z or gsutil cp -Z)
- when running integration tests using the gsutil test command
In these cases, it's possible the temporary file location on your system that
gsutil selects by default may not have enough space. If gsutil runs out of
space during one of these operations (for example, raising
"CommandException: Inadequate temp space available to compress <your file>"
during a ``gsutil cp -z`` operation), you can change where it writes these
temp files by setting the TMPDIR environment variable. On Linux and macOS,
you can set the variable as follows:
TMPDIR=/some/directory gsutil cp ...
You can also add this line to your ~/.bashrc file and restart the shell
before running gsutil:
export TMPDIR=/some/directory
On Windows 7, you can change the TMPDIR environment variable from Start ->
Computer -> System -> Advanced System Settings -> Environment Variables.
You need to reboot after making this change for it to take effect. Rebooting
is not necessary after running the export command on Linux and macOS.
"""
_COPYING_SPECIAL_FILES_TEXT = """
<B>SYNCHRONIZING OVER OS-SPECIFIC FILE TYPES (SUCH AS SYMLINKS AND DEVICES)</B>
Please see the section about OS-specific file types in "gsutil help rsync".
While that section refers to the ``rsync`` command, analogous
points apply to the ``cp`` command.
"""
_OPTIONS_TEXT = """
<B>OPTIONS</B>
-a predef_acl Applies the specific predefined ACL to uploaded objects. See
"gsutil help acls" for further details.
-A Copy all source versions from a source bucket or folder.
If not set, only the live version of each source object is
copied.
NOTE: This option is only useful when the destination
bucket has Object Versioning enabled. Additionally, the generation
numbers of copied versions do not necessarily match the order of the
original generation numbers.
-c If an error occurs, continue attempting to copy the remaining
files. If any copies are unsuccessful, gsutil's exit status
is non-zero, even if this flag is set. This option is
implicitly set when running ``gsutil -m cp...``.
NOTE: ``-c`` only applies to the actual copying operation. If an
error, such as ``invalid Unicode file name``, occurs while iterating
over the files in the local directory, gsutil prints an error
message and aborts.
-D Copy in "daisy chain" mode, which means copying between two buckets
by first downloading to the machine where gsutil is run, then
uploading to the destination bucket. The default mode is a
"copy in the cloud," where data is copied between two buckets without
uploading or downloading.
During a "copy in the cloud," a source composite object remains composite
at its destination. However, you can use "daisy chain" mode to change a
composite object into a non-composite object. For example:
gsutil cp -D gs://bucket/obj gs://bucket/obj_tmp
gsutil mv gs://bucket/obj_tmp gs://bucket/obj
NOTE: "Daisy chain" mode is automatically used when copying
between providers: for example, when copying data from Cloud Storage
to another provider.
-e Exclude symlinks. When specified, symbolic links are not copied.
-I Use ``stdin`` to specify a list of files or objects to copy. You can use
gsutil in a pipeline to upload or download objects as generated by a program.
For example:
cat filelist | gsutil -m cp -I gs://my-bucket
where the output of ``cat filelist`` is a one-per-line list of
files, cloud URLs, and wildcards of files and cloud URLs.
-j <ext,...> Applies gzip transport encoding to any file upload whose
extension matches the ``-j`` extension list. This is useful when
uploading files with compressible content such as .js, .css,
or .html files. This also saves network bandwidth while
leaving the data uncompressed in Cloud Storage.
When you specify the ``-j`` option, files being uploaded are
compressed in-memory and on-the-wire only. Both the local
files and Cloud Storage objects remain uncompressed. The
uploaded objects retain the ``Content-Type`` and name of the
original files.
Note that if you want to use the ``-m`` `top-level option
<https://cloud.google.com/storage/docs/gsutil/addlhelp/GlobalCommandLineOptions>`_
to parallelize copies along with the ``-j/-J`` options, your
performance may be bottlenecked by the
"max_upload_compression_buffer_size" boto config option,
which is set to 2 GiB by default. You can change this
compression buffer size to a higher limit. For example:
gsutil -o "GSUtil:max_upload_compression_buffer_size=8G" \\
-m cp -j html,txt -r /local/source/dir gs://bucket/path
-J Applies gzip transport encoding to file uploads. This option
works like the ``-j`` option described above, but it applies to
all uploaded files, regardless of extension.
CAUTION: If some of the source files don't compress well, such
as binary data, using this option may result in longer uploads.
-L <file> Outputs a manifest log file with detailed information about
each item that was copied. This manifest contains the following
information for each item:
- Source path.
- Destination path.
- Source size.
- Bytes transferred.
- MD5 hash.
- Transfer start time and date in UTC and ISO 8601 format.
- Transfer completion time and date in UTC and ISO 8601 format.
- Upload id, if a resumable upload was performed.
- Final result of the attempted transfer, either success or failure.
- Failure details, if any.
If the log file already exists, gsutil uses the file as an
input to the copy process, and appends log items to
the existing file. Objects that are marked in the
existing log file as having been successfully copied or
skipped are ignored. Objects without entries are
copied and ones previously marked as unsuccessful are
retried. This option can be used in conjunction with the ``-c`` option to
build a script that copies a large number of objects reliably,
using a bash script like the following:
until gsutil cp -c -L cp.log -r ./dir gs://bucket; do
sleep 1
done
The -c option enables copying to continue after failures
occur, and the -L option allows gsutil to pick up where it
left off without duplicating work. The loop continues
running as long as gsutil exits with a non-zero status. A non-zero
status indicates there was at least one failure during the copy
operation.
NOTE: If you are synchronizing the contents of a
directory and a bucket, or the contents of two buckets, see
"gsutil help rsync".
-n No-clobber. When specified, existing files or objects at the
destination are not replaced. Any items that are skipped
by this option are reported as skipped. gsutil
performs an additional GET request to check if an item
exists before attempting to upload the data. This saves gsutil
from retransmitting data, but the additional HTTP requests may make
small object transfers slower and more expensive.
-p Preserves ACLs when copying in the cloud. Note
that this option has performance and cost implications only when
using the XML API, as the XML API requires separate HTTP calls for
interacting with ACLs. You can mitigate this
performance issue using ``gsutil -m cp`` to perform parallel
copying. Note that this option only works if you have OWNER access
to all objects that are copied. If you want all objects in the
destination bucket to end up with the same ACL, you can avoid these
performance issues by setting a default object ACL on that bucket
instead of using ``cp -p``. See "gsutil help defacl".
Note that it's not valid to specify both the ``-a`` and ``-p`` options
together.
-P Enables POSIX attributes to be preserved when objects are
copied. ``gsutil cp`` copies fields provided by ``stat``. These fields
are the user ID of the owner, the group
ID of the owning group, the mode or permissions of the file, and
the access and modification time of the file. For downloads, these
attributes are only set if the source objects were uploaded
with this flag enabled.
On Windows, this flag only sets and restores access time and
modification time. This is because Windows doesn't support
POSIX uid/gid/mode.
-R, -r The ``-R`` and ``-r`` options are synonymous. They enable directories,
buckets, and bucket subdirectories to be copied recursively.
If you don't use this option for an upload, gsutil copies objects
it finds and skips directories. Similarly, if you don't
specify this option for a download, gsutil copies
objects at the current bucket directory level and skips subdirectories.
-s <class> Specifies the storage class of the destination object. If not
specified, the default storage class of the destination bucket
is used. This option is not valid for copying to non-cloud destinations.
-U Skips objects with unsupported object types instead of failing.
Unsupported object types include Amazon S3 objects in the GLACIER
storage class.
-v Prints the version-specific URL for each uploaded object. You can
use these URLs to safely make concurrent upload requests, because
Cloud Storage refuses to perform an update if the current
object version doesn't match the version-specific URL. See
`generation numbers
<https://cloud.google.com/storage/docs/metadata#generation-number>`_
for more details.
-z <ext,...> Applies gzip content-encoding to any file upload whose
extension matches the ``-z`` extension list. This is useful when
uploading files with compressible content such as .js, .css,
or .html files, because it reduces network bandwidth and storage
sizes. This can both improve performance and reduce costs.
When you specify the ``-z`` option, the data from your files is
compressed before it is uploaded, but your actual files are
left uncompressed on the local disk. The uploaded objects
retain the ``Content-Type`` and name of the original files, but
have their ``Content-Encoding`` metadata set to ``gzip`` to
indicate that the object data stored are compressed on the
Cloud Storage servers and have their ``Cache-Control`` metadata
set to ``no-transform``.
For example, the following command:
gsutil cp -z html \\
cattypes.html tabby.jpeg gs://mycats
does the following:
- The ``cp`` command uploads the files ``cattypes.html`` and
``tabby.jpeg`` to the bucket ``gs://mycats``.
- Based on the file extensions, gsutil sets the ``Content-Type``
of ``cattypes.html`` to ``text/html`` and ``tabby.jpeg`` to
``image/jpeg``.
- The ``-z`` option compresses the data in the file ``cattypes.html``.
- The ``-z`` option also sets the ``Content-Encoding`` for
``cattypes.html`` to ``gzip`` and the ``Cache-Control`` for
``cattypes.html`` to ``no-transform``.
Because the ``-z/-Z`` options compress data prior to upload, they
are not subject to the same compression buffer bottleneck that
can affect the ``-j/-J`` options.
Note that if you download an object with ``Content-Encoding:gzip``,
gsutil decompresses the content before writing the local file.
-Z Applies gzip content-encoding to file uploads. This option
works like the ``-z`` option described above, but it applies to
all uploaded files, regardless of extension.
CAUTION: If some of the source files don't compress well, such
as binary data, using this option may result in files taking up
more space in the cloud than they would if left uncompressed.
--stet If the STET binary can be found in boto or PATH, cp will
use the split-trust encryption tool for end-to-end encryption.
"""
_DETAILED_HELP_TEXT = '\n\n'.join([
_SYNOPSIS_TEXT,
_DESCRIPTION_TEXT,
_NAME_CONSTRUCTION_TEXT,
_SUBDIRECTORIES_TEXT,
_COPY_IN_CLOUD_TEXT,
_CHECKSUM_VALIDATION_TEXT,
_RETRY_HANDLING_TEXT,
_RESUMABLE_TRANSFERS_TEXT,
_STREAMING_TRANSFERS_TEXT,
_SLICED_OBJECT_DOWNLOADS_TEXT,
_PARALLEL_COMPOSITE_UPLOADS_TEXT,
_CHANGING_TEMP_DIRECTORIES_TEXT,
_COPYING_SPECIAL_FILES_TEXT,
_OPTIONS_TEXT,
])
CP_SUB_ARGS = 'a:AcDeIL:MNnpPrRs:tUvz:Zj:J'
# May be used by cp or mv.
CP_AND_MV_SHIM_FLAG_MAP = {
'-A': GcloudStorageFlag('--all-versions'),
'-a': GcloudStorageFlag('--predefined-acl'),
'-c': GcloudStorageFlag('--continue-on-error'),
'-D': GcloudStorageFlag('--daisy-chain'),
'-e': GcloudStorageFlag('--ignore-symlinks'),
'-I': GcloudStorageFlag('--read-paths-from-stdin'),
'-J': GcloudStorageFlag('--gzip-in-flight-all'),
'-j': GcloudStorageFlag('--gzip-in-flight'),
'-L': GcloudStorageFlag('--manifest-path'),
'-n': GcloudStorageFlag('--no-clobber'),
'-P': GcloudStorageFlag('--preserve-posix'),
'-p': GcloudStorageFlag('--preserve-acl'),
'-s': GcloudStorageFlag('--storage-class'),
'-v': GcloudStorageFlag('--print-created-message'),
'-Z': GcloudStorageFlag('--gzip-local-all'),
'-z': GcloudStorageFlag('--gzip-local'),
'-U': GcloudStorageFlag('--skip-unsupported'),
}
# Adds recursion flags.
CP_SHIM_FLAG_MAP = {
k: v for k, v in list(CP_AND_MV_SHIM_FLAG_MAP.items()) +
[('-r', GcloudStorageFlag('-r')), ('-R', GcloudStorageFlag('-r'))]
}
def ShimTranslatePredefinedAclSubOptForCopy(sub_opts):
"""Gcloud uses camel-case predefined/canned ACLs, and gsutil uses snake-case.
The camel-case-snake-case difference is related to gcloud primarily using
JSON API rather than the XML API.
Predefined ACLs are also called "canned ACLs".
Args:
sub_opts: List of pairs representing flag keys and values, e.g.
[('a', 'public-read')]
"""
predefined_acl_idx = None
for i, (k, _) in enumerate(sub_opts):
if k == '-a':
predefined_acl_idx = i
break
if predefined_acl_idx is not None:
old_predefined_acl = sub_opts[i][1]
sub_opts[i] = (sub_opts[i][0],
gcs_json_api.FULL_PREDEFINED_ACL_XML_TO_JSON_TRANSLATION.get(
old_predefined_acl, old_predefined_acl))
def _CopyFuncWrapper(cls, args, thread_state=None):
cls.CopyFunc(args,
thread_state=thread_state,
preserve_posix=cls.preserve_posix_attrs)
def _CopyExceptionHandler(cls, e):
"""Simple exception handler to allow post-completion status."""
cls.logger.error(str(e))
cls.op_failure_count += 1
cls.logger.debug('\n\nEncountered exception while copying:\n%s\n',
traceback.format_exc())
def _RmExceptionHandler(cls, e):
"""Simple exception handler to allow post-completion status."""
cls.logger.error(str(e))
class CpCommand(Command):
"""Implementation of gsutil cp command.
Note that CpCommand is run for both gsutil cp and gsutil mv. The latter
happens by MvCommand calling CpCommand and passing the hidden (undocumented)
-M option. This allows the copy and remove needed for each mv to run
together (rather than first running all the cp's and then all the rm's, as
we originally had implemented), which in turn avoids the following problem
with removing the wrong objects: starting with a bucket containing only
the object gs://bucket/obj, say the user does:
gsutil mv gs://bucket/* gs://bucket/d.txt
If we ran all the cp's and then all the rm's and we didn't expand the wildcard
first, the cp command would first copy gs://bucket/obj to gs://bucket/d.txt,
and the rm command would then remove that object. In the implementation
prior to gsutil release 3.12 we avoided this by building a list of objects
to process and then running the copies and then the removes; but building
the list up front limits scalability (compared with the current approach
of processing the bucket listing iterator on the fly).
"""
# Command specification. See base class for documentation.
command_spec = Command.CreateCommandSpec(
'cp',
command_name_aliases=['copy'],
usage_synopsis=_SYNOPSIS,
min_args=1,
max_args=NO_MAX,
# -t is deprecated but leave intact for now to avoid breakage.
supported_sub_args=CP_SUB_ARGS,
file_url_ok=True,
provider_url_ok=False,
urls_start_arg=0,
gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
gs_default_api=ApiSelector.JSON,
# Unfortunately, "private" args are the only way to support non-single
# character flags.
supported_private_args=['stet', 'testcallbackfile='],
argparse_arguments=[
CommandArgument.MakeZeroOrMoreCloudOrFileURLsArgument(),
],
)
# Help specification. See help_provider.py for documentation.
help_spec = Command.HelpSpec(
help_name='cp',
help_name_aliases=['copy'],
help_type='command_help',
help_one_line_summary='Copy files and objects',
help_text=_DETAILED_HELP_TEXT,
subcommand_help_text={},
)
def get_gcloud_storage_args(self):
self.logger.warn(
"Unlike pure gsutil, this shim won't run composite uploads and sliced"
' downloads in parallel by default. Use the -m flag to enable'
' parallelism (i.e. "gsutil -m cp ...").')
ShimTranslatePredefinedAclSubOptForCopy(self.sub_opts)
gcloud_storage_map = GcloudStorageMap(
gcloud_command=['storage', 'cp'],
flag_map=CP_SHIM_FLAG_MAP,
)
return super().get_gcloud_storage_args(gcloud_storage_map)
# pylint: disable=too-many-statements
def CopyFunc(self, copy_object_info, thread_state=None, preserve_posix=False):
"""Worker function for performing the actual copy (and rm, for mv)."""
gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
copy_helper_opts = copy_helper.GetCopyHelperOpts()
if copy_helper_opts.perform_mv:
cmd_name = 'mv'
else:
cmd_name = self.command_name
src_url = copy_object_info.source_storage_url
exp_src_url = copy_object_info.expanded_storage_url
src_url_names_container = copy_object_info.names_container
have_multiple_srcs = copy_object_info.is_multi_source_request
if src_url.IsCloudUrl() and src_url.IsProvider():
raise CommandException(
'The %s command does not allow provider-only source URLs (%s)' %
(cmd_name, src_url))
if preserve_posix and src_url.IsFileUrl() and src_url.IsStream():
raise CommandException('Cannot preserve POSIX attributes with a stream.')
if self.parallel_operations and src_url.IsFileUrl() and src_url.IsStream():
raise CommandException(
'Cannot upload from a stream when using gsutil -m option.')
if have_multiple_srcs:
copy_helper.InsistDstUrlNamesContainer(
copy_object_info.exp_dst_url,
copy_object_info.have_existing_dst_container, cmd_name)
# Various GUI tools (like the GCS web console) create placeholder objects
# ending with '/' when the user creates an empty directory. Normally these
# tools should delete those placeholders once objects have been written
# "under" the directory, but sometimes the placeholders are left around. We
# need to filter them out here, otherwise if the user tries to rsync from
# GCS to a local directory it will result in a directory/file conflict
# (e.g., trying to download an object called "mydata/" where the local
# directory "mydata" exists).
if IsCloudSubdirPlaceholder(exp_src_url):
# We used to output the message 'Skipping cloud sub-directory placeholder
# object...' but we no longer do so because it caused customer confusion.
return
if copy_helper_opts.use_manifest and self.manifest.WasSuccessful(
exp_src_url.url_string):
return
if copy_helper_opts.perform_mv and copy_object_info.names_container:
# Use recursion_requested when performing name expansion for the
# directory mv case so we can determine if any of the source URLs are
# directories (and then use cp -r and rm -r to perform the move, to
# match the behavior of Linux mv (which when moving a directory moves
# all the contained files).
self.recursion_requested = True
if (copy_object_info.exp_dst_url.IsFileUrl() and
not os.path.exists(copy_object_info.exp_dst_url.object_name) and
have_multiple_srcs):
try:
os.makedirs(copy_object_info.exp_dst_url.object_name)
except OSError as e:
if e.errno != errno.EEXIST:
raise
dst_url = copy_helper.ConstructDstUrl(
src_url,
exp_src_url,
src_url_names_container,
have_multiple_srcs,
copy_object_info.is_multi_top_level_source_request,
copy_object_info.exp_dst_url,
copy_object_info.have_existing_dst_container,
self.recursion_requested,
preserve_posix=preserve_posix)
dst_url = copy_helper.FixWindowsNaming(src_url, dst_url)
copy_helper.CheckForDirFileConflict(exp_src_url, dst_url)
if copy_helper.SrcDstSame(exp_src_url, dst_url):
raise CommandException('%s: "%s" and "%s" are the same file - '
'abort.' % (cmd_name, exp_src_url, dst_url))
if dst_url.IsCloudUrl() and dst_url.HasGeneration():
raise CommandException('%s: a version-specific URL\n(%s)\ncannot be '
'the destination for gsutil cp - abort.' %
(cmd_name, dst_url))
if not dst_url.IsCloudUrl() and copy_helper_opts.dest_storage_class:
raise CommandException('Cannot specify storage class for a non-cloud '
'destination: %s' % dst_url)
src_obj_metadata = None
if copy_object_info.expanded_result:
src_obj_metadata = encoding.JsonToMessage(
apitools_messages.Object, copy_object_info.expanded_result)
if src_url.IsFileUrl() and preserve_posix:
if not src_obj_metadata:
src_obj_metadata = apitools_messages.Object()
mode, _, _, _, uid, gid, _, atime, mtime, _ = os.stat(
exp_src_url.object_name)
mode = ConvertModeToBase8(mode)
posix_attrs = POSIXAttributes(atime=atime,
mtime=mtime,
uid=uid,
gid=gid,
mode=mode)
custom_metadata = apitools_messages.Object.MetadataValue(
additionalProperties=[])
SerializeFileAttributesToObjectMetadata(posix_attrs,
custom_metadata,
preserve_posix=preserve_posix)
src_obj_metadata.metadata = custom_metadata
if src_obj_metadata and dst_url.IsFileUrl():
posix_attrs = DeserializeFileAttributesFromObjectMetadata(
src_obj_metadata, src_url.url_string)
mode = posix_attrs.mode.permissions
valid, err = ValidateFilePermissionAccess(src_url.url_string,
uid=posix_attrs.uid,
gid=posix_attrs.gid,
mode=mode)
if preserve_posix and not valid:
logging.getLogger().critical(err)
raise CommandException('This sync will orphan file(s), please fix their'
' permissions before trying again.')
bytes_transferred = 0
try:
if copy_helper_opts.use_manifest:
self.manifest.Initialize(exp_src_url.url_string, dst_url.url_string)
if (self.recursion_requested and
copy_object_info.exp_dst_url.object_name and dst_url.IsFileUrl()):
# exp_dst_url is the wildcard-expanded path passed by the user:
# exp_dst_url => ~/dir
# container => /usr/name/dir
container = os.path.abspath(copy_object_info.exp_dst_url.object_name)
# dst_url holds the complete path of the object's destination:
# dst_url => /usr/name/dir/../file.txt
# abspath => /usr/name/file.txt
#
# Taking the common path of this and container yields: /usr/name,
# which does not start with container when the inclusion of '..' strings
# results in a copy outside of the container.
if not os.path.commonpath([
container, os.path.abspath(dst_url.object_name)
]).startswith(container):
self.logger.warn(
'Skipping copy of source URL %s because it would be copied '
'outside the expected destination directory: %s.' %
(exp_src_url, container))
if copy_helper_opts.use_manifest:
self.manifest.SetResult(
exp_src_url.url_string, 0, 'skip',
'Would have copied outside the destination directory.')
return
_, bytes_transferred, result_url, md5 = copy_helper.PerformCopy(
self.logger,
exp_src_url,
dst_url,
gsutil_api,
self,
_CopyExceptionHandler,
src_obj_metadata=src_obj_metadata,
allow_splitting=True,
headers=self.headers,
manifest=self.manifest,
gzip_encoded=self.gzip_encoded,
gzip_exts=self.gzip_exts,
preserve_posix=preserve_posix,
use_stet=self.use_stet)
if copy_helper_opts.use_manifest:
if md5:
self.manifest.Set(exp_src_url.url_string, 'md5', md5)
self.manifest.SetResult(exp_src_url.url_string, bytes_transferred, 'OK')
if copy_helper_opts.print_ver:
# Some cases don't return a version-specific URL (e.g., if destination
# is a file).
self.logger.info('Created: %s', result_url)
except ItemExistsError:
message = 'Skipping existing item: %s' % dst_url
self.logger.info(message)
if copy_helper_opts.use_manifest:
self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message)
except SkipUnsupportedObjectError as e:
message = ('Skipping item %s with unsupported object type %s' %
(exp_src_url.url_string, e.unsupported_type))
self.logger.info(message)
if copy_helper_opts.use_manifest:
self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message)
except copy_helper.FileConcurrencySkipError as e:
self.logger.warn(
'Skipping copy of source URL %s because destination URL '
'%s is already being copied by another gsutil process '
'or thread (did you specify the same source URL twice?) ' %
(src_url, dst_url))
except Exception as e: # pylint: disable=broad-except
if (copy_helper_opts.no_clobber and
copy_helper.IsNoClobberServerException(e)):
message = 'Rejected (noclobber): %s' % dst_url
self.logger.info(message)
if copy_helper_opts.use_manifest:
self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message)
elif self.continue_on_error:
message = 'Error copying %s: %s' % (src_url, str(e))
self.op_failure_count += 1
self.logger.error(message)
if copy_helper_opts.use_manifest:
self.manifest.SetResult(exp_src_url.url_string, 0, 'error',
RemoveCRLFFromString(message))
else:
if copy_helper_opts.use_manifest:
self.manifest.SetResult(exp_src_url.url_string, 0, 'error', str(e))
raise
else:
if copy_helper_opts.perform_mv:
self.logger.info('Removing %s...', exp_src_url)
if exp_src_url.IsCloudUrl():
gsutil_api.DeleteObject(exp_src_url.bucket_name,
exp_src_url.object_name,
generation=exp_src_url.generation,
provider=exp_src_url.scheme)
else:
os.unlink(exp_src_url.object_name)
with self.stats_lock:
# TODO: Remove stats_lock; we should be able to calculate bytes
# transferred from StatusMessages posted by operations within PerformCopy.
self.total_bytes_transferred += bytes_transferred
def _ConstructNameExpansionIteratorDstTupleIterator(self, src_url_strs_iter,
dst_url_strs):
copy_helper_opts = copy_helper.GetCopyHelperOpts()
for src_url_str, dst_url_str in zip(src_url_strs_iter, dst_url_strs):
# Getting the destination information for each (sources, destination)
# tuple. This assumes that the same destination is never provided in
# multiple tuples, and doing so may result in an inconsistent behavior
# especially when using the -m multi-threading option.
#
# Example for the inconsistent behavior, the following commands will
# behave differently:
#
# gsutil cp -r dir1 dir2 gs://bucket/non-existent-dir
# gsutil cp -r [
# (dir1, gs://bucket/non-existent-dir),
# (dir2, gs://bucket/non-existent-dir)
# ]
#
# When multiple threads execute on a non existing destination directory.
# These threads might encounter different states of the destination
# directory. The first thread to execute the command finds that the
# destination directory does not exist, it will create the destination
# directory and copies the files inside the source directories to the
# destination directory. The following threads find that the destination
# directory already exists and copy the source directories in the
# destination directory. In another scenario, all the threads might find
# that the destination directory does not exist and copy the source
# directories to the destination directory.
exp_dst_url, have_existing_dst_container = (
copy_helper.ExpandUrlToSingleBlr(dst_url_str,
self.gsutil_api,
self.project_id,
logger=self.logger))
name_expansion_iterator_dst_tuple = NameExpansionIteratorDestinationTuple(
NameExpansionIterator(
self.command_name,
self.debug,
self.logger,
self.gsutil_api,
src_url_str,
self.recursion_requested or copy_helper_opts.perform_mv,
project_id=self.project_id,
all_versions=self.all_versions,
ignore_symlinks=self.exclude_symlinks,
continue_on_error=(self.continue_on_error or
self.parallel_operations),
bucket_listing_fields=GetSourceFieldsNeededForCopy(
exp_dst_url.IsCloudUrl(),
copy_helper_opts.skip_unsupported_objects,
copy_helper_opts.preserve_acl,
preserve_posix=self.preserve_posix_attrs,
delete_source=copy_helper_opts.perform_mv,
file_size_will_change=self.use_stet)),
DestinationInfo(exp_dst_url, have_existing_dst_container))
self.has_file_dst = self.has_file_dst or exp_dst_url.IsFileUrl()
self.has_cloud_dst = self.has_cloud_dst or exp_dst_url.IsCloudUrl()
self.provider_types.add(exp_dst_url.scheme)
self.combined_src_urls = itertools.chain(self.combined_src_urls,
src_url_str)
yield name_expansion_iterator_dst_tuple
# Command entry point.
def RunCommand(self):
copy_helper_opts = self._ParseOpts()
self.total_bytes_transferred = 0
dst_url = StorageUrlFromString(self.args[-1])
if dst_url.IsFileUrl() and (dst_url.object_name == '-' or dst_url.IsFifo()):
if self.preserve_posix_attrs:
raise CommandException('Cannot preserve POSIX attributes with a '
'stream or a named pipe.')
cat_out_fd = (GetStreamFromFileUrl(dst_url, mode='wb')
if dst_url.IsFifo() else None)
return cat_helper.CatHelper(self).CatUrlStrings(self.args[:-1],
cat_out_fd=cat_out_fd)
if copy_helper_opts.read_args_from_stdin:
if len(self.args) != 1:
raise CommandException('Source URLs cannot be specified with -I option')
# Use StdinIteratorCls instead of StdinIterator here to avoid Python 3
# generator pickling errors when multiprocessing a command.
src_url_strs = [StdinIteratorCls()]
else:
if len(self.args) < 2:
raise CommandException('Wrong number of arguments for "cp" command.')
src_url_strs = [self.args[:-1]]
dst_url_strs = [self.args[-1]]
self.combined_src_urls = []
self.has_file_dst = False
self.has_cloud_dst = False
self.provider_types = set()
# Because cp may have multiple source URLs and multiple destinations, we
# wrap the name expansion iterator in order to collect analytics.
name_expansion_iterator = CopyObjectsIterator(
self._ConstructNameExpansionIteratorDstTupleIterator(
src_url_strs, dst_url_strs),
copy_helper_opts.daisy_chain,
)
process_count, thread_count = self._GetProcessAndThreadCount(
process_count=None,
thread_count=None,
parallel_operations_override=None,
print_macos_warning=False)
copy_helper.TriggerReauthForDestinationProviderIfNecessary(
dst_url, self.gsutil_api, process_count * thread_count)
seek_ahead_iterator = None
# Cannot seek ahead with stdin args, since we can only iterate them
# once without buffering in memory.
if not copy_helper_opts.read_args_from_stdin:
seek_ahead_iterator = SeekAheadNameExpansionIterator(
self.command_name,
self.debug,
self.GetSeekAheadGsutilApi(),
self.combined_src_urls,
self.recursion_requested or copy_helper_opts.perform_mv,
all_versions=self.all_versions,
project_id=self.project_id,
ignore_symlinks=self.exclude_symlinks,
file_size_will_change=self.use_stet)
# Use a lock to ensure accurate statistics in the face of
# multi-threading/multi-processing.
self.stats_lock = parallelism_framework_util.CreateLock()
# Tracks if any copies failed.
self.op_failure_count = 0
# Start the clock.
start_time = time.time()
# Tuple of attributes to share/manage across multiple processes in
# parallel (-m) mode.
shared_attrs = ('op_failure_count', 'total_bytes_transferred')
# Perform copy requests in parallel (-m) mode, if requested, using
# configured number of parallel processes and threads. Otherwise,
# perform requests with sequential function calls in current process.
self.Apply(_CopyFuncWrapper,
name_expansion_iterator,
_CopyExceptionHandler,
shared_attrs,
fail_on_error=(not self.continue_on_error),
seek_ahead_iterator=seek_ahead_iterator)
self.logger.debug('total_bytes_transferred: %d',
self.total_bytes_transferred)
end_time = time.time()
self.total_elapsed_time = end_time - start_time
self.total_bytes_per_second = CalculateThroughput(
self.total_bytes_transferred, self.total_elapsed_time)
LogPerformanceSummaryParams(
has_file_dst=self.has_file_dst,
has_cloud_dst=self.has_cloud_dst,
avg_throughput=self.total_bytes_per_second,
total_bytes_transferred=self.total_bytes_transferred,
total_elapsed_time=self.total_elapsed_time,
uses_fan=self.parallel_operations,
is_daisy_chain=copy_helper_opts.daisy_chain,
provider_types=list(self.provider_types))
if self.debug >= DEBUGLEVEL_DUMP_REQUESTS:
# Note that this only counts the actual GET and PUT bytes for the copy
# - not any transfers for doing wildcard expansion, the initial
# HEAD/GET request performed to get the object metadata, etc.
if self.total_bytes_transferred != 0:
self.logger.info(
'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',
self.total_bytes_transferred, self.total_elapsed_time,
MakeHumanReadable(self.total_bytes_per_second))
if self.op_failure_count:
plural_str = 's' if self.op_failure_count > 1 else ''
raise CommandException('{count} file{pl}/object{pl} could '
'not be transferred.'.format(
count=self.op_failure_count, pl=plural_str))
return 0
def _ParseOpts(self):
# TODO: Arrange variables initialized here in alphabetical order.
perform_mv = False
# exclude_symlinks is handled by Command parent class, so save in Command
# state rather than CopyHelperOpts.
self.exclude_symlinks = False
no_clobber = False
# continue_on_error is handled by Command parent class, so save in Command
# state rather than CopyHelperOpts.
self.continue_on_error = False
daisy_chain = False
read_args_from_stdin = False
print_ver = False
use_manifest = False
preserve_acl = False
self.preserve_posix_attrs = False
canned_acl = None
# canned_acl is handled by a helper function in parent
# Command class, so save in Command state rather than CopyHelperOpts.
self.canned = None
self.all_versions = False
self.skip_unsupported_objects = False
# Files matching these extensions should be compressed.
# The gzip_encoded flag marks if the files should be compressed during
# the upload. The gzip_local flag marks if the files should be compressed
# before uploading. Files compressed prior to uploaded are stored
# compressed, while files compressed during the upload are stored
# uncompressed. These flags cannot be mixed.
gzip_encoded = False
gzip_local = False
gzip_arg_exts = None
gzip_arg_all = None
test_callback_file = None
dest_storage_class = None
self.use_stet = False
# self.recursion_requested initialized in command.py (so can be checked
# in parent class for all commands).
self.manifest = None
if self.sub_opts:
for o, a in self.sub_opts:
if o == '-a':
canned_acl = a
self.canned = True
if o == '-A':
self.all_versions = True
if o == '-c':
self.continue_on_error = True
elif o == '-D':
daisy_chain = True
elif o == '-e':
self.exclude_symlinks = True
elif o == '--testcallbackfile':
# File path of a pickled class that implements ProgressCallback.call.
# Used for testing transfer interruptions and resumes.
test_callback_file = a
elif o == '-I':
read_args_from_stdin = True
elif o == '-j':
gzip_encoded = True
gzip_arg_exts = [x.strip() for x in a.split(',')]
elif o == '-J':
gzip_encoded = True
gzip_arg_all = GZIP_ALL_FILES
elif o == '-L':
use_manifest = True
self.manifest = Manifest(a)
elif o == '-M':
# Note that we signal to the cp command to perform a move (copy
# followed by remove) and use directory-move naming rules by passing
# the undocumented (for internal use) -M option when running the cp
# command from mv.py.
perform_mv = True
elif o == '-n':
no_clobber = True
elif o == '-p':
preserve_acl = True
elif o == '-P':
self.preserve_posix_attrs = True
InitializePreservePosixData()
elif o == '-r' or o == '-R':
self.recursion_requested = True
elif o == '-s':
dest_storage_class = NormalizeStorageClass(a)
elif o == '-U':
self.skip_unsupported_objects = True
elif o == '-v':
print_ver = True
elif o == '-z':
gzip_local = True
gzip_arg_exts = [x.strip() for x in a.split(',')]
elif o == '-Z':
gzip_local = True
gzip_arg_all = GZIP_ALL_FILES
elif o == '--stet':
self.use_stet = True
if preserve_acl and canned_acl:
raise CommandException(
'Specifying both the -p and -a options together is invalid.')
if self.all_versions and self.parallel_operations:
raise CommandException(
'The gsutil -m option is not supported with the cp -A flag, to '
'ensure that object version ordering is preserved. Please re-run '
'the command without the -m option.')
if gzip_encoded and gzip_local:
raise CommandException(
'Specifying both the -j/-J and -z/-Z options together is invalid.')
if gzip_arg_exts and gzip_arg_all:
if gzip_encoded:
raise CommandException(
'Specifying both the -j and -J options together is invalid.')
else:
raise CommandException(
'Specifying both the -z and -Z options together is invalid.')
self.gzip_exts = gzip_arg_exts or gzip_arg_all
self.gzip_encoded = gzip_encoded
return CreateCopyHelperOpts(
perform_mv=perform_mv,
no_clobber=no_clobber,
daisy_chain=daisy_chain,
read_args_from_stdin=read_args_from_stdin,
print_ver=print_ver,
use_manifest=use_manifest,
preserve_acl=preserve_acl,
canned_acl=canned_acl,
skip_unsupported_objects=self.skip_unsupported_objects,
test_callback_file=test_callback_file,
dest_storage_class=dest_storage_class)