in cc_net/jsonql.py [0:0]
def request_get_content(url: str, n_retry: int = 3) -> bytes:
"""Retrieve the binary content at url.
Retry on connection errors.
"""
t0 = time.time()
logging.info(f"Starting download of {url}")
for i in range(1, n_retry + 1):
try:
r = _session().get(url)
r.raise_for_status()
break
except requests.exceptions.RequestException as e:
# Sleep and try again on error, unless it's a 404.
message = e.args[0] if isinstance(e.args[0], str) else ""
if i == n_retry or "Client Error" in message:
raise e
warnings.warn(
f"Swallowed error {e} while downloading {url} ({i} out of {n_retry})"
)
time.sleep(10 * 2 ** i)
dl_time = time.time() - t0
dl_speed = len(r.content) / dl_time / 1024
logging.info(
f"Downloaded {url} [{r.status_code}] took {dl_time:.0f}s ({dl_speed:.1f}kB/s)"
)
return r.content