in mozdownload/scraper.py [0:0]
def download(self):
"""Download the specified file."""
def total_seconds(td):
# Keep backward compatibility with Python 2.6 which doesn't have
# this method
if hasattr(td, 'total_seconds'):
return td.total_seconds()
else:
return (td.microseconds +
(td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6
# Don't re-download the file
if os.path.isfile(os.path.abspath(self.filename)):
self.logger.info("File has already been downloaded: %s" %
(self.filename))
return self.filename
directory = os.path.dirname(self.filename)
if not os.path.isdir(directory):
os.makedirs(directory)
self.logger.info('Downloading from: %s' % self.url)
self.logger.info('Saving as: %s' % self.filename)
tmp_file = self.filename + ".part"
def _download():
try:
start_time = datetime.now()
# Enable streaming mode so we can download content in chunks
r = self.session.get(self.url, stream=True)
r.raise_for_status()
content_length = r.headers.get('Content-length')
# ValueError: Value out of range if only total_size given
if content_length:
total_size = int(content_length.strip())
max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE
bytes_downloaded = 0
log_level = self.logger.getEffectiveLevel()
if log_level <= logging.INFO and content_length:
widgets = [pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA(),
' ', pb.FileTransferSpeed()]
pbar = pb.ProgressBar(widgets=widgets,
maxval=max_value).start()
with open(tmp_file, 'wb') as f:
for chunk in r.iter_content(CHUNK_SIZE):
f.write(chunk)
bytes_downloaded += CHUNK_SIZE
if log_level <= logging.INFO and content_length:
pbar.update(bytes_downloaded)
t1 = total_seconds(datetime.now() - start_time)
if self.timeout_download and \
t1 >= self.timeout_download:
raise errors.TimeoutError
if log_level <= logging.INFO and content_length:
pbar.finish()
except Exception as ex:
if os.path.isfile(tmp_file):
os.remove(tmp_file)
if type(ex) is requests.exceptions.HTTPError and \
ex.response.status_code == 404:
raise errors.NotFoundError("The requested url was not found", self.url)
else:
raise
self._retry(_download,
retry_exceptions=(errors.NotFoundError,
errors.TimeoutError))
os.rename(tmp_file, self.filename)
return self.filename