def get_file_to

def get_file_to_stream()

in azure/multiapi/storage/v2018_11_09/file/fileservice.py [0:0]
144 lines of code
26 McCabe index (conditional complexity)

    def get_file_to_stream(
            self, share_name, directory_name, file_name, stream,
            start_range=None, end_range=None, validate_content=False,
            progress_callback=None, max_connections=2, timeout=None, snapshot=None):
        '''
        Downloads a file to a stream, with automatic chunking and progress
        notifications. Returns an instance of :class:`~azure.storage.file.models.File` with properties
        and metadata.

        :param str share_name:
            Name of existing share.
        :param str directory_name:
            The path to the directory.
        :param str file_name:
            Name of existing file.
        :param io.IOBase stream:
            Opened file/stream to write to.
        :param int start_range:
            Start of byte range to use for downloading a section of the file.
            If no end_range is given, all bytes after the start_range will be downloaded.
            The start_range and end_range params are inclusive.
            Ex: start_range=0, end_range=511 will download first 512 bytes of file.
        :param int end_range:
            End of byte range to use for downloading a section of the file.
            If end_range is given, start_range must be provided.
            The start_range and end_range params are inclusive.
            Ex: start_range=0, end_range=511 will download first 512 bytes of file.
        :param bool validate_content:
            If set to true, validates an MD5 hash for each retrieved portion of 
            the file. This is primarily valuable for detecting bitflips on the wire 
            if using http instead of https as https (the default) will already 
            validate. Note that the service will only return transactional MD5s 
            for chunks 4MB or less so the first get request will be of size 
            self.MAX_CHUNK_GET_SIZE instead of self.MAX_SINGLE_GET_SIZE. If 
            self.MAX_CHUNK_GET_SIZE was set to greater than 4MB an error will be 
            thrown. As computing the MD5 takes processing time and more requests 
            will need to be done due to the reduced chunk size there may be some 
            increase in latency.
        :param progress_callback:
            Callback for progress with signature function(current, total) 
            where current is the number of bytes transfered so far, and total is 
            the size of the file if known.
        :type progress_callback: func(current, total)
        :param int max_connections:
            If set to 2 or greater, an initial get will be done for the first 
            self.MAX_SINGLE_GET_SIZE bytes of the file. If this is the entire file, 
            the method returns at this point. If it is not, it will download the 
            remaining data parallel using the number of threads equal to 
            max_connections. Each chunk will be of size self.MAX_CHUNK_GET_SIZE.
            If set to 1, a single large get request will be done. This is not 
            generally recommended but available if very few threads should be 
            used, network requests are very expensive, or a non-seekable stream 
            prevents parallel download. This may also be valuable if the file is 
            being concurrently modified to enforce atomicity or if many files are 
            expected to be empty as an extra request is required for empty files 
            if max_connections is greater than 1.
        :param int timeout:
            The timeout parameter is expressed in seconds. This method may make 
            multiple calls to the Azure service and the timeout will apply to 
            each call individually.
        :param str snapshot:
            A string that represents the snapshot version, if applicable.
        :return: A File with properties and metadata.
        :rtype: :class:`~azure.storage.file.models.File`
        '''
        _validate_not_none('share_name', share_name)
        _validate_not_none('file_name', file_name)
        _validate_not_none('stream', stream)

        if end_range is not None:
            _validate_not_none("start_range", start_range)

        # the stream must be seekable if parallel download is required
        if max_connections > 1:
            if sys.version_info >= (3,) and not stream.seekable():
                raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE)
            else:
                try:
                    stream.seek(stream.tell())
                except (NotImplementedError, AttributeError):
                    raise ValueError(_ERROR_PARALLEL_NOT_SEEKABLE)

        # The service only provides transactional MD5s for chunks under 4MB.
        # If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
        # chunk so a transactional MD5 can be retrieved.
        first_get_size = self.MAX_SINGLE_GET_SIZE if not validate_content else self.MAX_CHUNK_GET_SIZE

        initial_request_start = start_range if start_range is not None else 0

        if end_range is not None and end_range - start_range < first_get_size:
            initial_request_end = end_range
        else:
            initial_request_end = initial_request_start + first_get_size - 1

        # Send a context object to make sure we always retry to the initial location
        operation_context = _OperationContext(location_lock=True)
        try:
            file = self._get_file(share_name,
                                  directory_name,
                                  file_name,
                                  start_range=initial_request_start,
                                  end_range=initial_request_end,
                                  validate_content=validate_content,
                                  timeout=timeout,
                                  _context=operation_context,
                                  snapshot=snapshot)

            # Parse the total file size and adjust the download size if ranges
            # were specified
            file_size = _parse_length_from_content_range(file.properties.content_range)
            if end_range is not None:
                # Use the end_range unless it is over the end of the file
                download_size = min(file_size, end_range - start_range + 1)
            elif start_range is not None:
                download_size = file_size - start_range
            else:
                download_size = file_size
        except AzureHttpError as ex:
            if start_range is None and ex.status_code == 416:
                # Get range will fail on an empty file. If the user did not
                # request a range, do a regular get request in order to get
                # any properties.
                file = self._get_file(share_name,
                                      directory_name,
                                      file_name,
                                      validate_content=validate_content,
                                      timeout=timeout,
                                      _context=operation_context,
                                      snapshot=snapshot)

                # Set the download size to empty
                download_size = 0
            else:
                raise ex

        # Mark the first progress chunk. If the file is small, this is the only call
        if progress_callback:
            progress_callback(file.properties.content_length, download_size)

        # Write the content to the user stream  
        # Clear file content since output has been written to user stream   
        if file.content is not None:
            stream.write(file.content)
            file.content = None

        # If the file is small, the download is complete at this point.
        # If file size is large, download the rest of the blob in chunks.
        if file.properties.content_length != download_size:
            # At this point we would like to lock on something like the etag so that
            # if the file is modified, we do not get a corrupted download. However,
            # this feature is not yet available on the file service.

            end_file = file_size
            if end_range is not None:
                # Use the end_range unless it is over the end of the file
                end_file = min(file_size, end_range + 1)

            _download_file_chunks(
                self,
                share_name,
                directory_name,
                file_name,
                download_size,
                self.MAX_CHUNK_GET_SIZE,
                first_get_size,
                initial_request_end + 1,  # start where the first download ended
                end_file,
                stream,
                max_connections,
                progress_callback,
                validate_content,
                timeout,
                operation_context,
                snapshot
            )

            # Set the content length to the download size instead of the size of 
            # the last range
            file.properties.content_length = download_size

            # Overwrite the content range to the user requested range
            file.properties.content_range = 'bytes {0}-{1}/{2}'.format(start_range, end_range, file_size)

            # Overwrite the content MD5 as it is the MD5 for the last range instead 
            # of the stored MD5
            # TODO: Set to the stored MD5 when the service returns this
            file.properties.content_md5 = None

        return file