in azure/multiapi/storage/v2018_11_09/file/fileservice.py [0:0]
def get_file_to_stream(
self, share_name, directory_name, file_name, stream,
start_range=None, end_range=None, validate_content=False,
progress_callback=None, max_connections=2, timeout=None, snapshot=None):
'''
Downloads a file to a stream, with automatic chunking and progress
notifications. Returns an instance of :class:`~azure.storage.file.models.File` with properties
and metadata.
:param str share_name:
Name of existing share.
:param str directory_name:
The path to the directory.
:param str file_name:
Name of existing file.
:param io.IOBase stream:
Opened file/stream to write to.
:param int start_range:
Start of byte range to use for downloading a section of the file.
If no end_range is given, all bytes after the start_range will be downloaded.
The start_range and end_range params are inclusive.
Ex: start_range=0, end_range=511 will download first 512 bytes of file.
:param int end_range:
End of byte range to use for downloading a section of the file.
If end_range is given, start_range must be provided.
The start_range and end_range params are inclusive.
Ex: start_range=0, end_range=511 will download first 512 bytes of file.
:param bool validate_content:
If set to true, validates an MD5 hash for each retrieved portion of
the file. This is primarily valuable for detecting bitflips on the wire
if using http instead of https as https (the default) will already
validate. Note that the service will only return transactional MD5s
for chunks 4MB or less so the first get request will be of size
self.MAX_CHUNK_GET_SIZE instead of self.MAX_SINGLE_GET_SIZE. If
self.MAX_CHUNK_GET_SIZE was set to greater than 4MB an error will be
thrown. As computing the MD5 takes processing time and more requests
will need to be done due to the reduced chunk size there may be some
increase in latency.
:param progress_callback:
Callback for progress with signature function(current, total)
where current is the number of bytes transfered so far, and total is
the size of the file if known.
:type progress_callback: func(current, total)
:param int max_connections:
If set to 2 or greater, an initial get will be done for the first
self.MAX_SINGLE_GET_SIZE bytes of the file. If this is the entire file,
the method returns at this point. If it is not, it will download the
remaining data parallel using the number of threads equal to
max_connections. Each chunk will be of size self.MAX_CHUNK_GET_SIZE.
If set to 1, a single large get request will be done. This is not
generally recommended but available if very few threads should be
used, network requests are very expensive, or a non-seekable stream
prevents parallel download. This may also be valuable if the file is
being concurrently modified to enforce atomicity or if many files are
expected to be empty as an extra request is required for empty files
if max_connections is greater than 1.
:param int timeout:
The timeout parameter is expressed in seconds. This method may make
multiple calls to the Azure service and the timeout will apply to
each call individually.
:param str snapshot:
A string that represents the snapshot version, if applicable.
:return: A File with properties and metadata.
:rtype: :class:`~azure.storage.file.models.File`
'''
_validate_not_none('share_name', share_name)
_validate_not_none('file_name', file_name)
_validate_not_none('stream', stream)
if end_range is not None:
_validate_not_none("start_range", start_range)
# the stream must be seekable if parallel download is required
if max_connections > 1:
if sys.version_info >= (3,) and not stream.seekable():
raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE)
else:
try:
stream.seek(stream.tell())
except (NotImplementedError, AttributeError):
raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE)
# The service only provides transactional MD5s for chunks under 4MB.
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
# chunk so a transactional MD5 can be retrieved.
first_get_size = self.MAX_SINGLE_GET_SIZE if not validate_content else self.MAX_CHUNK_GET_SIZE
initial_request_start = start_range if start_range is not None else 0
if end_range is not None and end_range - start_range < first_get_size:
initial_request_end = end_range
else:
initial_request_end = initial_request_start + first_get_size - 1
# Send a context object to make sure we always retry to the initial location
operation_context = _OperationContext(location_lock=True)
try:
file = self._get_file(share_name,
directory_name,
file_name,
start_range=initial_request_start,
end_range=initial_request_end,
validate_content=validate_content,
timeout=timeout,
_context=operation_context,
snapshot=snapshot)
# Parse the total file size and adjust the download size if ranges
# were specified
file_size = _parse_length_from_content_range(file.properties.content_range)
if end_range is not None:
# Use the end_range unless it is over the end of the file
download_size = min(file_size, end_range - start_range + 1)
elif start_range is not None:
download_size = file_size - start_range
else:
download_size = file_size
except AzureHttpError as ex:
if start_range is None and ex.status_code == 416:
# Get range will fail on an empty file. If the user did not
# request a range, do a regular get request in order to get
# any properties.
file = self._get_file(share_name,
directory_name,
file_name,
validate_content=validate_content,
timeout=timeout,
_context=operation_context,
snapshot=snapshot)
# Set the download size to empty
download_size = 0
else:
raise ex
# Mark the first progress chunk. If the file is small, this is the only call
if progress_callback:
progress_callback(file.properties.content_length, download_size)
# Write the content to the user stream
# Clear file content since output has been written to user stream
if file.content is not None:
stream.write(file.content)
file.content = None
# If the file is small, the download is complete at this point.
# If file size is large, download the rest of the blob in chunks.
if file.properties.content_length != download_size:
# At this point we would like to lock on something like the etag so that
# if the file is modified, we do not get a corrupted download. However,
# this feature is not yet available on the file service.
end_file = file_size
if end_range is not None:
# Use the end_range unless it is over the end of the file
end_file = min(file_size, end_range + 1)
_download_file_chunks(
self,
share_name,
directory_name,
file_name,
download_size,
self.MAX_CHUNK_GET_SIZE,
first_get_size,
initial_request_end + 1, # start where the first download ended
end_file,
stream,
max_connections,
progress_callback,
validate_content,
timeout,
operation_context,
snapshot
)
# Set the content length to the download size instead of the size of
# the last range
file.properties.content_length = download_size
# Overwrite the content range to the user requested range
file.properties.content_range = 'bytes {0}-{1}/{2}'.format(start_range, end_range, file_size)
# Overwrite the content MD5 as it is the MD5 for the last range instead
# of the stored MD5
# TODO: Set to the stored MD5 when the service returns this
file.properties.content_md5 = None
return file