def open_remote_file()

in cc_net/jsonql.py [0:0]


def open_remote_file(url: str, cache: Path = None) -> Iterable[str]:
    """Download the files at the given url to memory and opens it as a file.
    Assumes that the file is small, and fetch it when this function is called.
    """
    if cache and cache.exists():
        return open_read(cache)

    # TODO: open the remote file in streaming mode.
    # The hard part is that we need to write the content on disk at the same time,
    # to implement disk caching.
    raw_bytes = request_get_content(url)
    content = io.BytesIO(raw_bytes)
    if url.endswith(".gz"):
        f: TextIO = gzip.open(content, mode="rt")  # type: ignore
    else:
        f = io.TextIOWrapper(content)

    if cache and not cache.exists():
        # The file might have been created while downloading/writing.
        tmp_cache = _tmp(cache)
        tmp_cache.write_bytes(raw_bytes)
        if not cache.exists():
            tmp_cache.replace(cache)
        else:
            tmp_cache.unlink()

    return _close_when_exhausted(f)