in cc_net/jsonql.py [0:0]
def open_remote_file(url: str, cache: Path = None) -> Iterable[str]:
"""Download the files at the given url to memory and opens it as a file.
Assumes that the file is small, and fetch it when this function is called.
"""
if cache and cache.exists():
return open_read(cache)
# TODO: open the remote file in streaming mode.
# The hard part is that we need to write the content on disk at the same time,
# to implement disk caching.
raw_bytes = request_get_content(url)
content = io.BytesIO(raw_bytes)
if url.endswith(".gz"):
f: TextIO = gzip.open(content, mode="rt") # type: ignore
else:
f = io.TextIOWrapper(content)
if cache and not cache.exists():
# The file might have been created while downloading/writing.
tmp_cache = _tmp(cache)
tmp_cache.write_bytes(raw_bytes)
if not cache.exists():
tmp_cache.replace(cache)
else:
tmp_cache.unlink()
return _close_when_exhausted(f)