in gslib/utils/copy_helper.py [0:0]
def _ValidateAndCompleteDownload(logger,
src_url,
src_obj_metadata,
dst_url,
need_to_unzip,
server_gzip,
digesters,
hash_algs,
temporary_file_name,
api_selector,
bytes_transferred,
gsutil_api,
is_rsync=False,
preserve_posix=False,
use_stet=False):
"""Validates and performs necessary operations on a downloaded file.
Validates the integrity of the downloaded file using hash_algs. If the file
was compressed (temporarily), the file will be decompressed. Then, if the
integrity of the file was successfully validated, the file will be moved
from its temporary download location to its permanent location on disk.
Args:
logger: For outputting log messages.
src_url: StorageUrl for the source object.
src_obj_metadata: Metadata for the source object, potentially containing
hash values.
dst_url: StorageUrl describing the destination file.
need_to_unzip: If true, a temporary zip file was used and must be
uncompressed as part of validation.
server_gzip: If true, the server gzipped the bytes (regardless of whether
the object metadata claimed it was gzipped).
digesters: dict of {string, hash digester} that contains up-to-date digests
computed during the download. If a digester for a particular
algorithm is None, an up-to-date digest is not available and the
hash must be recomputed from the local file.
hash_algs: dict of {string, hash algorithm} that can be used if digesters
don't have up-to-date digests.
temporary_file_name: Temporary file name that was used for download.
api_selector: The Cloud API implementation used (used tracker file naming).
bytes_transferred: Number of bytes downloaded (used for logging).
gsutil_api: Cloud API to use for service and status.
is_rsync: Whether or not the caller is the rsync function. Used to determine
if timeCreated should be used.
preserve_posix: Whether or not to preserve the posix attributes.
use_stet: If True, attempt to decrypt downloaded files with the STET
binary if it's present on the system.
Returns:
An MD5 of the local file, if one was calculated as part of the integrity
check.
"""
final_file_name = dst_url.object_name
digesters_succeeded = True
for alg in digesters:
# If we get a digester with a None algorithm, the underlying
# implementation failed to calculate a digest, so we will need to
# calculate one from scratch.
if not digesters[alg]:
digesters_succeeded = False
break
if digesters_succeeded:
local_hashes = _CreateDigestsFromDigesters(digesters)
else:
local_hashes = _CreateDigestsFromLocalFile(gsutil_api.status_queue,
hash_algs, temporary_file_name,
src_url, src_obj_metadata)
digest_verified = True
hash_invalid_exception = None
try:
_CheckHashes(logger, src_url, src_obj_metadata, final_file_name,
local_hashes)
DeleteDownloadTrackerFiles(dst_url, api_selector)
except HashMismatchException as e:
# If an non-gzipped object gets sent with gzip content encoding, the hash
# we calculate will match the gzipped bytes, not the original object. Thus,
# we'll need to calculate and check it after unzipping.
if server_gzip:
logger.debug('Hash did not match but server gzipped the content, will '
'recalculate.')
digest_verified = False
elif api_selector == ApiSelector.XML:
logger.debug(
'Hash did not match but server may have gzipped the content, will '
'recalculate.')
# Save off the exception in case this isn't a gzipped file.
hash_invalid_exception = e
digest_verified = False
else:
DeleteDownloadTrackerFiles(dst_url, api_selector)
if _RENAME_ON_HASH_MISMATCH:
os.rename(temporary_file_name,
final_file_name + _RENAME_ON_HASH_MISMATCH_SUFFIX)
else:
os.unlink(temporary_file_name)
raise
if not (need_to_unzip or server_gzip):
unzipped_temporary_file_name = temporary_file_name
else:
# This will not result in the same string as temporary_file_name b/c
# GetTempFileName returns ".gstmp" and gzipped temp files have ".gztmp".
unzipped_temporary_file_name = temporary_file_util.GetTempFileName(dst_url)
# Log that we're uncompressing if the file is big enough that
# decompressing would make it look like the transfer "stalled" at the end.
if bytes_transferred > TEN_MIB:
logger.info('Uncompressing temporarily gzipped file to %s...',
final_file_name)
gzip_fp = None
try:
# Downloaded temporarily gzipped file, unzip to file without '_.gztmp'
# suffix.
gzip_fp = gzip.open(temporary_file_name, 'rb')
with open(unzipped_temporary_file_name, 'wb') as f_out:
data = gzip_fp.read(GZIP_CHUNK_SIZE)
while data:
f_out.write(data)
data = gzip_fp.read(GZIP_CHUNK_SIZE)
except IOError as e:
# In the XML case where we don't know if the file was gzipped, raise
# the original hash exception if we find that it wasn't.
if 'Not a gzipped file' in str(e) and hash_invalid_exception:
# Linter improperly thinks we're raising None despite the above check.
# pylint: disable=raising-bad-type
raise hash_invalid_exception
finally:
if gzip_fp:
gzip_fp.close()
os.unlink(temporary_file_name)
if not digest_verified:
try:
# Recalculate hashes on the unzipped local file.
local_hashes = _CreateDigestsFromLocalFile(gsutil_api.status_queue,
hash_algs,
unzipped_temporary_file_name,
src_url, src_obj_metadata)
_CheckHashes(logger, src_url, src_obj_metadata, final_file_name,
local_hashes)
DeleteDownloadTrackerFiles(dst_url, api_selector)
except HashMismatchException:
DeleteDownloadTrackerFiles(dst_url, api_selector)
if _RENAME_ON_HASH_MISMATCH:
os.rename(
unzipped_temporary_file_name,
unzipped_temporary_file_name + _RENAME_ON_HASH_MISMATCH_SUFFIX)
else:
os.unlink(unzipped_temporary_file_name)
raise
if use_stet:
# Decrypt data using STET binary.
stet_util.decrypt_download(src_url, dst_url, unzipped_temporary_file_name,
logger)
os.rename(unzipped_temporary_file_name, final_file_name)
ParseAndSetPOSIXAttributes(final_file_name,
src_obj_metadata,
is_rsync=is_rsync,
preserve_posix=preserve_posix)
if 'md5' in local_hashes:
return local_hashes['md5']