def create_blob_from

def create_blob_from_stream()

in azure/multiapi/storage/v2018_11_09/blob/blockblobservice.py [0:0]
170 lines of code
20 McCabe index (conditional complexity)

    def create_blob_from_stream(
            self, container_name, blob_name, stream, count=None,
            content_settings=None, metadata=None, validate_content=False,
            progress_callback=None, max_connections=2, lease_id=None,
            if_modified_since=None, if_unmodified_since=None, if_match=None,
            if_none_match=None, timeout=None, use_byte_buffer=False):
        '''
        Creates a new blob from a file/stream, or updates the content of
        an existing blob, with automatic chunking and progress
        notifications.

        :param str container_name:
            Name of existing container.
        :param str blob_name:
            Name of blob to create or update.
        :param io.IOBase stream:
            Opened file/stream to upload as the blob content.
        :param int count:
            Number of bytes to read from the stream. This is optional, but
            should be supplied for optimal performance.
        :param ~azure.storage.blob.models.ContentSettings content_settings:
            ContentSettings object used to set blob properties.
        :param metadata:
            Name-value pairs associated with the blob as metadata.
        :type metadata: dict(str, str)
        :param bool validate_content:
            If true, calculates an MD5 hash for each chunk of the blob. The storage
            service checks the hash of the content that has arrived with the hash
            that was sent. This is primarily valuable for detecting bitflips on
            the wire if using http instead of https as https (the default) will
            already validate. Note that this MD5 hash is not stored with the
            blob. Also note that if enabled, the memory-efficient upload algorithm
            will not be used, because computing the MD5 hash requires buffering
            entire blocks, and doing so defeats the purpose of the memory-efficient algorithm.
        :param progress_callback:
            Callback for progress with signature function(current, total) where
            current is the number of bytes transfered so far, and total is the
            size of the blob, or None if the total size is unknown.
        :type progress_callback: func(current, total)
        :param int max_connections:
            Maximum number of parallel connections to use when the blob size exceeds
            64MB. Note that parallel upload requires the stream to be seekable.
        :param str lease_id:
            Required if the blob has an active lease.
        :param datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :param datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :param str if_match:
            An ETag value, or the wildcard character (*). Specify this header to perform
            the operation only if the resource's ETag matches the value specified.
        :param str if_none_match:
            An ETag value, or the wildcard character (*). Specify this header
            to perform the operation only if the resource's ETag does not match
            the value specified. Specify the wildcard character (*) to perform
            the operation only if the resource does not exist, and fail the
            operation if it does exist.
        :param int timeout:
            The timeout parameter is expressed in seconds. This method may make
            multiple calls to the Azure service and the timeout will apply to
            each call individually.
        :param bool use_byte_buffer:
            If True, this will force usage of the original full block buffering upload path.
            By default, this value is False and will employ a memory-efficient,
            streaming upload algorithm under the following conditions:
            The provided stream is seekable, 'require_encryption' is False, and
            MAX_BLOCK_SIZE >= MIN_LARGE_BLOCK_UPLOAD_THRESHOLD.
            One should consider the drawbacks of using this approach. In order to achieve
            memory-efficiency, a IOBase stream or file-like object is segmented into logical blocks
            using a SubStream wrapper. In order to read the correct data, each SubStream must acquire
            a lock so that it can safely seek to the right position on the shared, underlying stream.
            If max_connections > 1, the concurrency will result in a considerable amount of seeking on
            the underlying stream. For the most common inputs such as a file-like stream object, seeking
            is an inexpensive operation and this is not much of a concern. However, for other variants of streams
            this may not be the case. The trade-off for memory-efficiency must be weighed against the cost of seeking
            with your input stream.
            The SubStream class will attempt to buffer up to 4 MB internally to reduce the amount of
            seek and read calls to the underlying stream. This is particularly beneficial when uploading larger blocks.
        :return: ETag and last modified properties for the Block Blob
        :rtype: :class:`~azure.storage.blob.models.ResourceProperties`
        '''
        _validate_not_none('container_name', container_name)
        _validate_not_none('blob_name', blob_name)
        _validate_not_none('stream', stream)
        _validate_encryption_required(self.require_encryption, self.key_encryption_key)

        # Adjust count to include padding if we are expected to encrypt.
        adjusted_count = count
        if (self.key_encryption_key is not None) and (adjusted_count is not None):
            adjusted_count += (16 - (count % 16))

        # Do single put if the size is smaller than MAX_SINGLE_PUT_SIZE
        if adjusted_count is not None and (adjusted_count < self.MAX_SINGLE_PUT_SIZE):
            if progress_callback:
                progress_callback(0, count)

            data = stream.read(count)
            resp = self._put_blob(
                container_name=container_name,
                blob_name=blob_name,
                blob=data,
                content_settings=content_settings,
                metadata=metadata,
                validate_content=validate_content,
                lease_id=lease_id,
                if_modified_since=if_modified_since,
                if_unmodified_since=if_unmodified_since,
                if_match=if_match,
                if_none_match=if_none_match,
                timeout=timeout)

            if progress_callback:
                progress_callback(count, count)

            return resp
        else:  # Size is larger than MAX_SINGLE_PUT_SIZE, must upload with multiple put_block calls
            cek, iv, encryption_data = None, None, None

            use_original_upload_path = use_byte_buffer or validate_content or self.require_encryption or \
                                       self.MAX_BLOCK_SIZE < self.MIN_LARGE_BLOCK_UPLOAD_THRESHOLD or \
                                       hasattr(stream, 'seekable') and not stream.seekable() or \
                                       not hasattr(stream, 'seek') or not hasattr(stream, 'tell')

            if use_original_upload_path:
                if self.key_encryption_key:
                    cek, iv, encryption_data = _generate_blob_encryption_data(self.key_encryption_key)

                block_ids = _upload_blob_chunks(
                    blob_service=self,
                    container_name=container_name,
                    blob_name=blob_name,
                    blob_size=count,
                    block_size=self.MAX_BLOCK_SIZE,
                    stream=stream,
                    max_connections=max_connections,
                    progress_callback=progress_callback,
                    validate_content=validate_content,
                    lease_id=lease_id,
                    uploader_class=_BlockBlobChunkUploader,
                    timeout=timeout,
                    content_encryption_key=cek,
                    initialization_vector=iv
                )
            else:
                block_ids = _upload_blob_substream_blocks(
                    blob_service=self,
                    container_name=container_name,
                    blob_name=blob_name,
                    blob_size=count,
                    block_size=self.MAX_BLOCK_SIZE,
                    stream=stream,
                    max_connections=max_connections,
                    progress_callback=progress_callback,
                    validate_content=validate_content,
                    lease_id=lease_id,
                    uploader_class=_BlockBlobChunkUploader,
                    timeout=timeout,
                )

            return self._put_block_list(
                container_name=container_name,
                blob_name=blob_name,
                block_list=block_ids,
                content_settings=content_settings,
                metadata=metadata,
                validate_content=validate_content,
                lease_id=lease_id,
                if_modified_since=if_modified_since,
                if_unmodified_since=if_unmodified_since,
                if_match=if_match,
                if_none_match=if_none_match,
                timeout=timeout,
                encryption_data=encryption_data
            )