in src/clients/s3_client.py [0:0]
def download_file_from_presigned_url(self, presigned_url, headers=None) -> Tuple[str, map, S3_STATUS_CODES]:
"""
Download the file from a s3's presigned url.
Python AWS-SDK doesn't provide any method to download from a presigned url directly so we'd have to make a simple GET httpcall.
"""
parsed_headers = self._filter_request_headers(presigned_url, headers)
for i in range(self.S3_DOWNLOAD_MAX_RETRIES):
start_time = time.time()
LOG.debug(f"Downloading object with presigned url {presigned_url} and headers: {parsed_headers}")
response = self.session.get(presigned_url, timeout=self.MAX_GET_TIMEOUT, headers=parsed_headers)
end_time = time.time()
try:
# Since presigned urls do not return correct status codes when there is an error,
# the xml must be parsed to find the error code and status
error_detected, (error_code, error_message, response_status_code) = self._contains_error(response)
if error_detected:
status_code_enum, error_code_enum = error_code_to_enums(error_code)
LOG.error(f"Error downloading file from presigned url. ({error_code}: {error_message})")
status_code = int(status_code_enum.name[-3:])
if status_code not in self.S3_RETRY_STATUS_CODES or i == self.S3_DOWNLOAD_MAX_RETRIES - 1:
LOG.error("Client error or max retries reached for downloading file from presigned url.")
self.download_metrics.add_fault_count()
raise S3DownloadException(error_code, error_message)
else:
text_content = response.content.decode('utf-8')
if CONTENT_LENGTH in response.headers and int(response.headers.get(CONTENT_LENGTH)) > self.max_file_supported:
raise FileSizeLimitExceededException("File too large to process")
self.download_metrics.add_latency(start_time, end_time)
return text_content, response.headers, response_status_code,
time.sleep(max(1.0, i ** self.BACKOFF_FACTOR))
except UnicodeDecodeError:
raise UnsupportedFileException(response.content, response.headers, "Not a valid utf-8 file")