def get_blob_to

def get_blob_to_stream()

in azure/multiapi/storage/v2018_11_09/blob/baseblobservice.py [0:0]
208 lines of code
29 McCabe index (conditional complexity)

    def get_blob_to_stream(
            self, container_name, blob_name, stream, snapshot=None,
            start_range=None, end_range=None, validate_content=False,
            progress_callback=None, max_connections=2, lease_id=None,
            if_modified_since=None, if_unmodified_since=None, if_match=None,
            if_none_match=None, timeout=None):

        '''
        Downloads a blob to a stream, with automatic chunking and progress
        notifications. Returns an instance of :class:`~azure.storage.blob.models.Blob` with
        properties and metadata.

        :param str container_name:
            Name of existing container.
        :param str blob_name:
            Name of existing blob.
        :param io.IOBase stream:
            Opened stream to write to.
        :param str snapshot:
            The snapshot parameter is an opaque DateTime value that,
            when present, specifies the blob snapshot to retrieve.
        :param int start_range:
            Start of byte range to use for downloading a section of the blob.
            If no end_range is given, all bytes after the start_range will be downloaded.
            The start_range and end_range params are inclusive.
            Ex: start_range=0, end_range=511 will download first 512 bytes of blob.
        :param int end_range:
            End of byte range to use for downloading a section of the blob.
            If end_range is given, start_range must be provided.
            The start_range and end_range params are inclusive.
            Ex: start_range=0, end_range=511 will download first 512 bytes of blob.
        :param bool validate_content:
            If set to true, validates an MD5 hash for each retrieved portion of 
            the blob. This is primarily valuable for detecting bitflips on the wire 
            if using http instead of https as https (the default) will already 
            validate. Note that the service will only return transactional MD5s 
            for chunks 4MB or less so the first get request will be of size 
            self.MAX_CHUNK_GET_SIZE instead of self.MAX_SINGLE_GET_SIZE. If 
            self.MAX_CHUNK_GET_SIZE was set to greater than 4MB an error will be 
            thrown. As computing the MD5 takes processing time and more requests 
            will need to be done due to the reduced chunk size there may be some 
            increase in latency.
        :param progress_callback:
            Callback for progress with signature function(current, total) 
            where current is the number of bytes transfered so far, and total is 
            the size of the blob if known.
        :type progress_callback: func(current, total)
        :param int max_connections:
            If set to 2 or greater, an initial get will be done for the first 
            self.MAX_SINGLE_GET_SIZE bytes of the blob. If this is the entire blob, 
            the method returns at this point. If it is not, it will download the 
            remaining data parallel using the number of threads equal to 
            max_connections. Each chunk will be of size self.MAX_CHUNK_GET_SIZE.
            If set to 1, a single large get request will be done. This is not 
            generally recommended but available if very few threads should be 
            used, network requests are very expensive, or a non-seekable stream 
            prevents parallel download. This may also be useful if many blobs are 
            expected to be empty as an extra request is required for empty blobs 
            if max_connections is greater than 1.
        :param str lease_id:
            Required if the blob has an active lease.
        :param datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC. 
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :param datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :param str if_match:
            An ETag value, or the wildcard character (*). Specify this header to perform
            the operation only if the resource's ETag matches the value specified.
        :param str if_none_match:
            An ETag value, or the wildcard character (*). Specify this header
            to perform the operation only if the resource's ETag does not match
            the value specified. Specify the wildcard character (*) to perform
            the operation only if the resource does not exist, and fail the
            operation if it does exist.
        :param int timeout:
            The timeout parameter is expressed in seconds. This method may make 
            multiple calls to the Azure service and the timeout will apply to 
            each call individually.
        :return: A Blob with properties and metadata. If max_connections is greater 
            than 1, the content_md5 (if set on the blob) will not be returned. If you 
            require this value, either use get_blob_properties or set max_connections 
            to 1.
        :rtype: :class:`~azure.storage.blob.models.Blob`
        '''
        _validate_not_none('container_name', container_name)
        _validate_not_none('blob_name', blob_name)
        _validate_not_none('stream', stream)

        if end_range is not None:
            _validate_not_none("start_range", start_range)

        # the stream must be seekable if parallel download is required
        if max_connections > 1:
            if sys.version_info >= (3,) and not stream.seekable():
                raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE)

            try:
                stream.seek(stream.tell())
            except (NotImplementedError, AttributeError):
                raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE)

        # The service only provides transactional MD5s for chunks under 4MB.
        # If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
        # chunk so a transactional MD5 can be retrieved.
        first_get_size = self.MAX_SINGLE_GET_SIZE if not validate_content else self.MAX_CHUNK_GET_SIZE

        initial_request_start = start_range if start_range is not None else 0

        if end_range is not None and end_range - start_range < first_get_size:
            initial_request_end = end_range
        else:
            initial_request_end = initial_request_start + first_get_size - 1

        # Send a context object to make sure we always retry to the initial location
        operation_context = _OperationContext(location_lock=True)
        try:
            blob = self._get_blob(container_name,
                                  blob_name,
                                  snapshot,
                                  start_range=initial_request_start,
                                  end_range=initial_request_end,
                                  validate_content=validate_content,
                                  lease_id=lease_id,
                                  if_modified_since=if_modified_since,
                                  if_unmodified_since=if_unmodified_since,
                                  if_match=if_match,
                                  if_none_match=if_none_match,
                                  timeout=timeout,
                                  _context=operation_context)

            # Parse the total blob size and adjust the download size if ranges
            # were specified
            blob_size = _parse_length_from_content_range(blob.properties.content_range)
            if end_range is not None:
                # Use the end_range unless it is over the end of the blob
                download_size = min(blob_size, end_range - start_range + 1)
            elif start_range is not None:
                download_size = blob_size - start_range
            else:
                download_size = blob_size
        except AzureHttpError as ex:
            if start_range is None and ex.status_code == 416:
                # Get range will fail on an empty blob. If the user did not
                # request a range, do a regular get request in order to get
                # any properties.
                blob = self._get_blob(container_name,
                                      blob_name,
                                      snapshot,
                                      validate_content=validate_content,
                                      lease_id=lease_id,
                                      if_modified_since=if_modified_since,
                                      if_unmodified_since=if_unmodified_since,
                                      if_match=if_match,
                                      if_none_match=if_none_match,
                                      timeout=timeout,
                                      _context=operation_context)

                # Set the download size to empty
                download_size = 0
            else:
                raise ex

        # Mark the first progress chunk. If the blob is small or this is a single
        # shot download, this is the only call
        if progress_callback:
            progress_callback(blob.properties.content_length, download_size)

        # Write the content to the user stream
        # Clear blob content since output has been written to user stream
        if blob.content is not None:
            stream.write(blob.content)
            blob.content = None

        # If the blob is small, the download is complete at this point.
        # If blob size is large, download the rest of the blob in chunks.
        if blob.properties.content_length != download_size:
            # Lock on the etag. This can be overriden by the user by specifying '*'
            if_match = if_match if if_match is not None else blob.properties.etag

            end_blob = blob_size
            if end_range is not None:
                # Use the end_range unless it is over the end of the blob
                end_blob = min(blob_size, end_range + 1)

            _download_blob_chunks(
                self,
                container_name,
                blob_name,
                snapshot,
                download_size,
                self.MAX_CHUNK_GET_SIZE,
                first_get_size,
                initial_request_end + 1,  # start where the first download ended
                end_blob,
                stream,
                max_connections,
                progress_callback,
                validate_content,
                lease_id,
                if_modified_since,
                if_unmodified_since,
                if_match,
                if_none_match,
                timeout,
                operation_context
            )

            # Set the content length to the download size instead of the size of
            # the last range
            blob.properties.content_length = download_size

            # Overwrite the content range to the user requested range
            blob.properties.content_range = 'bytes {0}-{1}/{2}'.format(start_range, end_range, blob_size)

            # Overwrite the content MD5 as it is the MD5 for the last range instead
            # of the stored MD5
            # TODO: Set to the stored MD5 when the service returns this
            blob.properties.content_md5 = None

        return blob