in collection/download_wayback_passages.py [0:0]
def download_with_retry(url: str, max_retries: int = 10) -> requests.Response:
"""Download a URL with exponential backoff, until max_retries is reached."""
retry_num = 0
while True:
try:
response = requests.get(url)
response.raise_for_status()
return response
except HTTPError as e:
status_code = e.response.status_code
if not (status_code == 429 or status_code >= 500):
# This is not an error we should retry on
raise e
if retry_num > max_retries:
logging.error(
f'Failed to perform GET request on {url} after {max_retries} retries.'
)
raise e
if status_code == 429:
time.sleep(5 + 2 ** retry_num + random.randint(0, 1000) / 1000)
else:
time.sleep(2 ** retry_num + random.randint(0, 1000) / 1000)
retry_num += 1