in mmf/utils/download.py [0:0]
def download(url, path, fname, redownload=True, disable_tqdm=False):
"""
Download file using `requests`.
If ``redownload`` is set to false, then will not download tar file again if it is
present (default ``True``).
Returns whether download actually happened or not
"""
outfile = os.path.join(path, fname)
download = not PathManager.isfile(outfile) or redownload
retry = 5
exp_backoff = [2 ** r for r in reversed(range(retry))]
pbar = None
if download:
# First test if the link is actually downloadable
check_header(url)
if not disable_tqdm:
print("[ Downloading: " + url + " to " + outfile + " ]")
pbar = tqdm.tqdm(
unit="B", unit_scale=True, desc=f"Downloading {fname}", disable=disable_tqdm
)
while download and retry >= 0:
resume_file = outfile + ".part"
resume = PathManager.isfile(resume_file)
if resume:
resume_pos = os.path.getsize(resume_file)
mode = "ab"
else:
resume_pos = 0
mode = "wb"
response = None
with requests.Session() as session:
try:
header = (
{"Range": "bytes=%d-" % resume_pos, "Accept-Encoding": "identity"}
if resume
else {}
)
response = session.get(url, stream=True, timeout=5, headers=header)
# negative reply could be 'none' or just missing
if resume and response.headers.get("Accept-Ranges", "none") == "none":
resume_pos = 0
mode = "wb"
CHUNK_SIZE = 32768
total_size = int(response.headers.get("Content-Length", -1))
# server returns remaining size if resuming, so adjust total
total_size += resume_pos
pbar.total = total_size
done = resume_pos
with PathManager.open(resume_file, mode) as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
if total_size > 0:
done += len(chunk)
if total_size < done:
# don't freak out if content-length was too small
total_size = done
pbar.total = total_size
pbar.update(len(chunk))
break
except (
requests.exceptions.ConnectionError,
requests.exceptions.ReadTimeout,
):
retry -= 1
pbar.clear()
if retry >= 0:
print("Connection error, retrying. (%d retries left)" % retry)
time.sleep(exp_backoff[retry])
else:
print("Retried too many times, stopped retrying.")
finally:
if response:
response.close()
if retry < 0:
raise RuntimeWarning("Connection broken too many times. Stopped retrying.")
if download and retry > 0:
pbar.update(done - pbar.n)
if done < total_size:
raise RuntimeWarning(
"Received less data than specified in "
+ "Content-Length header for "
+ url
+ ". There may be a download problem."
)
move(resume_file, outfile)
if pbar:
pbar.close()
return download