in url_utils.py [0:0]
def download_url(url, destination_filename=None, progress_updater=None,
force_download=False):
"""
Download a URL to a file. If no file is specified, creates a temporary file,
with a semi-best-effort to avoid filename collisions.
Prints some diagnostic information and makes sure to omit SAS tokens from printouts.
progress_updater can be "None", "True", or a specific callback.
"""
if progress_updater is not None and isinstance(progress_updater,bool):
if not progress_updater:
progress_updater = None
else:
progress_updater = DownloadProgressBar()
url_no_sas = url.split('?')[0]
if destination_filename is None:
target_folder = get_temp_folder()
url_without_sas = url.split('?', 1)[0]
# This does not guarantee uniqueness, hence "semi-best-effort"
url_as_filename = re.sub(r'\W+', '', url_without_sas)
n_folder_chars = len(ai4e_utils_temp_dir)
if len(url_as_filename) + n_folder_chars > max_path_len:
print('Warning: truncating filename target to {} characters'.format(max_path_len))
url_as_filename = url_as_filename[-1*(max_path_len-n_folder_chars):]
destination_filename = \
os.path.join(target_folder,url_as_filename)
if (not force_download) and (os.path.isfile(destination_filename)):
print('Bypassing download of already-downloaded file {}'.format(os.path.basename(url_no_sas)))
else:
print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
urllib.request.urlretrieve(url, destination_filename, progress_updater)
assert(os.path.isfile(destination_filename))
nBytes = os.path.getsize(destination_filename)
print('...done, {} bytes.'.format(nBytes))
return destination_filename