in azure/multiapi/storage/v2018_11_09/blob/baseblobservice.py [0:0]
def get_blob_to_stream(
self, container_name, blob_name, stream, snapshot=None,
start_range=None, end_range=None, validate_content=False,
progress_callback=None, max_connections=2, lease_id=None,
if_modified_since=None, if_unmodified_since=None, if_match=None,
if_none_match=None, timeout=None):
'''
Downloads a blob to a stream, with automatic chunking and progress
notifications. Returns an instance of :class:`~azure.storage.blob.models.Blob` with
properties and metadata.
:param str container_name:
Name of existing container.
:param str blob_name:
Name of existing blob.
:param io.IOBase stream:
Opened stream to write to.
:param str snapshot:
The snapshot parameter is an opaque DateTime value that,
when present, specifies the blob snapshot to retrieve.
:param int start_range:
Start of byte range to use for downloading a section of the blob.
If no end_range is given, all bytes after the start_range will be downloaded.
The start_range and end_range params are inclusive.
Ex: start_range=0, end_range=511 will download first 512 bytes of blob.
:param int end_range:
End of byte range to use for downloading a section of the blob.
If end_range is given, start_range must be provided.
The start_range and end_range params are inclusive.
Ex: start_range=0, end_range=511 will download first 512 bytes of blob.
:param bool validate_content:
If set to true, validates an MD5 hash for each retrieved portion of
the blob. This is primarily valuable for detecting bitflips on the wire
if using http instead of https as https (the default) will already
validate. Note that the service will only return transactional MD5s
for chunks 4MB or less so the first get request will be of size
self.MAX_CHUNK_GET_SIZE instead of self.MAX_SINGLE_GET_SIZE. If
self.MAX_CHUNK_GET_SIZE was set to greater than 4MB an error will be
thrown. As computing the MD5 takes processing time and more requests
will need to be done due to the reduced chunk size there may be some
increase in latency.
:param progress_callback:
Callback for progress with signature function(current, total)
where current is the number of bytes transfered so far, and total is
the size of the blob if known.
:type progress_callback: func(current, total)
:param int max_connections:
If set to 2 or greater, an initial get will be done for the first
self.MAX_SINGLE_GET_SIZE bytes of the blob. If this is the entire blob,
the method returns at this point. If it is not, it will download the
remaining data parallel using the number of threads equal to
max_connections. Each chunk will be of size self.MAX_CHUNK_GET_SIZE.
If set to 1, a single large get request will be done. This is not
generally recommended but available if very few threads should be
used, network requests are very expensive, or a non-seekable stream
prevents parallel download. This may also be useful if many blobs are
expected to be empty as an extra request is required for empty blobs
if max_connections is greater than 1.
:param str lease_id:
Required if the blob has an active lease.
:param datetime if_modified_since:
A DateTime value. Azure expects the date value passed in to be UTC.
If timezone is included, any non-UTC datetimes will be converted to UTC.
If a date is passed in without timezone info, it is assumed to be UTC.
Specify this header to perform the operation only
if the resource has been modified since the specified time.
:param datetime if_unmodified_since:
A DateTime value. Azure expects the date value passed in to be UTC.
If timezone is included, any non-UTC datetimes will be converted to UTC.
If a date is passed in without timezone info, it is assumed to be UTC.
Specify this header to perform the operation only if
the resource has not been modified since the specified date/time.
:param str if_match:
An ETag value, or the wildcard character (*). Specify this header to perform
the operation only if the resource's ETag matches the value specified.
:param str if_none_match:
An ETag value, or the wildcard character (*). Specify this header
to perform the operation only if the resource's ETag does not match
the value specified. Specify the wildcard character (*) to perform
the operation only if the resource does not exist, and fail the
operation if it does exist.
:param int timeout:
The timeout parameter is expressed in seconds. This method may make
multiple calls to the Azure service and the timeout will apply to
each call individually.
:return: A Blob with properties and metadata. If max_connections is greater
than 1, the content_md5 (if set on the blob) will not be returned. If you
require this value, either use get_blob_properties or set max_connections
to 1.
:rtype: :class:`~azure.storage.blob.models.Blob`
'''
_validate_not_none('container_name', container_name)
_validate_not_none('blob_name', blob_name)
_validate_not_none('stream', stream)
if end_range is not None:
_validate_not_none("start_range", start_range)
# the stream must be seekable if parallel download is required
if max_connections > 1:
if sys.version_info >= (3,) and not stream.seekable():
raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE)
try:
stream.seek(stream.tell())
except (NotImplementedError, AttributeError):
raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE)
# The service only provides transactional MD5s for chunks under 4MB.
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
# chunk so a transactional MD5 can be retrieved.
first_get_size = self.MAX_SINGLE_GET_SIZE if not validate_content else self.MAX_CHUNK_GET_SIZE
initial_request_start = start_range if start_range is not None else 0
if end_range is not None and end_range - start_range < first_get_size:
initial_request_end = end_range
else:
initial_request_end = initial_request_start + first_get_size - 1
# Send a context object to make sure we always retry to the initial location
operation_context = _OperationContext(location_lock=True)
try:
blob = self._get_blob(container_name,
blob_name,
snapshot,
start_range=initial_request_start,
end_range=initial_request_end,
validate_content=validate_content,
lease_id=lease_id,
if_modified_since=if_modified_since,
if_unmodified_since=if_unmodified_since,
if_match=if_match,
if_none_match=if_none_match,
timeout=timeout,
_context=operation_context)
# Parse the total blob size and adjust the download size if ranges
# were specified
blob_size = _parse_length_from_content_range(blob.properties.content_range)
if end_range is not None:
# Use the end_range unless it is over the end of the blob
download_size = min(blob_size, end_range - start_range + 1)
elif start_range is not None:
download_size = blob_size - start_range
else:
download_size = blob_size
except AzureHttpError as ex:
if start_range is None and ex.status_code == 416:
# Get range will fail on an empty blob. If the user did not
# request a range, do a regular get request in order to get
# any properties.
blob = self._get_blob(container_name,
blob_name,
snapshot,
validate_content=validate_content,
lease_id=lease_id,
if_modified_since=if_modified_since,
if_unmodified_since=if_unmodified_since,
if_match=if_match,
if_none_match=if_none_match,
timeout=timeout,
_context=operation_context)
# Set the download size to empty
download_size = 0
else:
raise ex
# Mark the first progress chunk. If the blob is small or this is a single
# shot download, this is the only call
if progress_callback:
progress_callback(blob.properties.content_length, download_size)
# Write the content to the user stream
# Clear blob content since output has been written to user stream
if blob.content is not None:
stream.write(blob.content)
blob.content = None
# If the blob is small, the download is complete at this point.
# If blob size is large, download the rest of the blob in chunks.
if blob.properties.content_length != download_size:
# Lock on the etag. This can be overriden by the user by specifying '*'
if_match = if_match if if_match is not None else blob.properties.etag
end_blob = blob_size
if end_range is not None:
# Use the end_range unless it is over the end of the blob
end_blob = min(blob_size, end_range + 1)
_download_blob_chunks(
self,
container_name,
blob_name,
snapshot,
download_size,
self.MAX_CHUNK_GET_SIZE,
first_get_size,
initial_request_end + 1, # start where the first download ended
end_blob,
stream,
max_connections,
progress_callback,
validate_content,
lease_id,
if_modified_since,
if_unmodified_since,
if_match,
if_none_match,
timeout,
operation_context
)
# Set the content length to the download size instead of the size of
# the last range
blob.properties.content_length = download_size
# Overwrite the content range to the user requested range
blob.properties.content_range = 'bytes {0}-{1}/{2}'.format(start_range, end_range, blob_size)
# Overwrite the content MD5 as it is the MD5 for the last range instead
# of the stored MD5
# TODO: Set to the stored MD5 when the service returns this
blob.properties.content_md5 = None
return blob