in cc_net/jsonql.py [0:0]
def open_read(filename: ReadableFileLike) -> Iterable[str]:
"""Open the given file, list of files or files matching the given glob and read lines.
`filename` is None or "-" -> reads from stdin
`filename` is a Path / str -> interprets filename as a glob and open files matching it
`filename` is a list -> opens sequentially all files from the list using `open_read`
`filename` is something else -> returns the object wrapped in a `nullcontext`
This allows to pass already openened files or iterables.
`open_read` will decompress gzip files, given they have ".gz" suffix.
"""
if filename is None:
return sys.stdin
if isinstance(filename, list):
assert isinstance(filename[0], Path)
if len(filename) == 0:
return []
if len(filename) > 1:
return _yield_from(filename)
filename = tp.cast(Path, filename[0])
if isinstance(filename, str):
if filename.startswith("http://") or filename.startswith("https://"):
return open_remote_file(filename)
filename = Path(filename)
if not isinstance(filename, Path):
# we might have received an iterable, return it unmodified.
return filename # type: ignore
# Expand glob patterns only when reading
files = [Path(f) for f in sorted(glob.glob(str(filename)))]
if len(files) > 1:
return _yield_from(files)
if len(files) == 1:
filename = files[0]
assert isinstance(filename, Path)
if filename.name.endswith("]"):
return block_reader(filename)
logging.getLogger(__name__).info(f"Opening {filename} with mode 'rt'")
if filename.suffix == ".gz":
file: TextIO = gzip.open(filename, "rt") # type: ignore
else:
file = open(filename, "rt")
return _close_when_exhausted(file)