def download_with_retry()

in collection/download_wayback_passages.py [0:0]


def download_with_retry(url: str, max_retries: int = 10) -> requests.Response:
    """Download a URL with exponential backoff, until max_retries is reached."""
    retry_num = 0
    while True:
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response
        except HTTPError as e:
            status_code = e.response.status_code
            if not (status_code == 429 or status_code >= 500):
                # This is not an error we should retry on
                raise e

            if retry_num > max_retries:
                logging.error(
                    f'Failed to perform GET request on {url} after {max_retries} retries.'
                )
                raise e

            if status_code == 429:
                time.sleep(5 + 2 ** retry_num + random.randint(0, 1000) / 1000)
            else:
                time.sleep(2 ** retry_num + random.randint(0, 1000) / 1000)
            retry_num += 1