def _ValidateAndCompleteDownload()

in gslib/utils/copy_helper.py [0:0]


def _ValidateAndCompleteDownload(logger,
                                 src_url,
                                 src_obj_metadata,
                                 dst_url,
                                 need_to_unzip,
                                 server_gzip,
                                 digesters,
                                 hash_algs,
                                 temporary_file_name,
                                 api_selector,
                                 bytes_transferred,
                                 gsutil_api,
                                 is_rsync=False,
                                 preserve_posix=False,
                                 use_stet=False):
  """Validates and performs necessary operations on a downloaded file.

  Validates the integrity of the downloaded file using hash_algs. If the file
  was compressed (temporarily), the file will be decompressed. Then, if the
  integrity of the file was successfully validated, the file will be moved
  from its temporary download location to its permanent location on disk.

  Args:
    logger: For outputting log messages.
    src_url: StorageUrl for the source object.
    src_obj_metadata: Metadata for the source object, potentially containing
                      hash values.
    dst_url: StorageUrl describing the destination file.
    need_to_unzip: If true, a temporary zip file was used and must be
                   uncompressed as part of validation.
    server_gzip: If true, the server gzipped the bytes (regardless of whether
                 the object metadata claimed it was gzipped).
    digesters: dict of {string, hash digester} that contains up-to-date digests
               computed during the download. If a digester for a particular
               algorithm is None, an up-to-date digest is not available and the
               hash must be recomputed from the local file.
    hash_algs: dict of {string, hash algorithm} that can be used if digesters
               don't have up-to-date digests.
    temporary_file_name: Temporary file name that was used for download.
    api_selector: The Cloud API implementation used (used tracker file naming).
    bytes_transferred: Number of bytes downloaded (used for logging).
    gsutil_api: Cloud API to use for service and status.
    is_rsync: Whether or not the caller is the rsync function. Used to determine
              if timeCreated should be used.
    preserve_posix: Whether or not to preserve the posix attributes.
    use_stet: If True, attempt to decrypt downloaded files with the STET
              binary if it's present on the system.

  Returns:
    An MD5 of the local file, if one was calculated as part of the integrity
    check.
  """
  final_file_name = dst_url.object_name
  digesters_succeeded = True

  for alg in digesters:
    # If we get a digester with a None algorithm, the underlying
    # implementation failed to calculate a digest, so we will need to
    # calculate one from scratch.
    if not digesters[alg]:
      digesters_succeeded = False
      break

  if digesters_succeeded:
    local_hashes = _CreateDigestsFromDigesters(digesters)
  else:
    local_hashes = _CreateDigestsFromLocalFile(gsutil_api.status_queue,
                                               hash_algs, temporary_file_name,
                                               src_url, src_obj_metadata)

  digest_verified = True
  hash_invalid_exception = None
  try:
    _CheckHashes(logger, src_url, src_obj_metadata, final_file_name,
                 local_hashes)
    DeleteDownloadTrackerFiles(dst_url, api_selector)
  except HashMismatchException as e:
    # If an non-gzipped object gets sent with gzip content encoding, the hash
    # we calculate will match the gzipped bytes, not the original object. Thus,
    # we'll need to calculate and check it after unzipping.
    if server_gzip:
      logger.debug('Hash did not match but server gzipped the content, will '
                   'recalculate.')
      digest_verified = False
    elif api_selector == ApiSelector.XML:
      logger.debug(
          'Hash did not match but server may have gzipped the content, will '
          'recalculate.')
      # Save off the exception in case this isn't a gzipped file.
      hash_invalid_exception = e
      digest_verified = False
    else:
      DeleteDownloadTrackerFiles(dst_url, api_selector)
      if _RENAME_ON_HASH_MISMATCH:
        os.rename(temporary_file_name,
                  final_file_name + _RENAME_ON_HASH_MISMATCH_SUFFIX)
      else:
        os.unlink(temporary_file_name)
      raise

  if not (need_to_unzip or server_gzip):
    unzipped_temporary_file_name = temporary_file_name
  else:
    # This will not result in the same string as temporary_file_name b/c
    # GetTempFileName returns ".gstmp" and gzipped temp files have ".gztmp".
    unzipped_temporary_file_name = temporary_file_util.GetTempFileName(dst_url)
    # Log that we're uncompressing if the file is big enough that
    # decompressing would make it look like the transfer "stalled" at the end.
    if bytes_transferred > TEN_MIB:
      logger.info('Uncompressing temporarily gzipped file to %s...',
                  final_file_name)

    gzip_fp = None
    try:
      # Downloaded temporarily gzipped file, unzip to file without '_.gztmp'
      # suffix.
      gzip_fp = gzip.open(temporary_file_name, 'rb')
      with open(unzipped_temporary_file_name, 'wb') as f_out:
        data = gzip_fp.read(GZIP_CHUNK_SIZE)
        while data:
          f_out.write(data)
          data = gzip_fp.read(GZIP_CHUNK_SIZE)
    except IOError as e:
      # In the XML case where we don't know if the file was gzipped, raise
      # the original hash exception if we find that it wasn't.
      if 'Not a gzipped file' in str(e) and hash_invalid_exception:
        # Linter improperly thinks we're raising None despite the above check.
        # pylint: disable=raising-bad-type
        raise hash_invalid_exception
    finally:
      if gzip_fp:
        gzip_fp.close()

    os.unlink(temporary_file_name)

  if not digest_verified:
    try:
      # Recalculate hashes on the unzipped local file.
      local_hashes = _CreateDigestsFromLocalFile(gsutil_api.status_queue,
                                                 hash_algs,
                                                 unzipped_temporary_file_name,
                                                 src_url, src_obj_metadata)
      _CheckHashes(logger, src_url, src_obj_metadata, final_file_name,
                   local_hashes)
      DeleteDownloadTrackerFiles(dst_url, api_selector)
    except HashMismatchException:
      DeleteDownloadTrackerFiles(dst_url, api_selector)
      if _RENAME_ON_HASH_MISMATCH:
        os.rename(
            unzipped_temporary_file_name,
            unzipped_temporary_file_name + _RENAME_ON_HASH_MISMATCH_SUFFIX)
      else:
        os.unlink(unzipped_temporary_file_name)
      raise

  if use_stet:
    # Decrypt data using STET binary.
    stet_util.decrypt_download(src_url, dst_url, unzipped_temporary_file_name,
                               logger)

  os.rename(unzipped_temporary_file_name, final_file_name)
  ParseAndSetPOSIXAttributes(final_file_name,
                             src_obj_metadata,
                             is_rsync=is_rsync,
                             preserve_posix=preserve_posix)

  if 'md5' in local_hashes:
    return local_hashes['md5']