in antlir/fs_utils.py [0:0]
def open_for_read_decompress(path: Path):
'Wraps `open(path, "rb")` to add transparent `.zst` or `.gz` decompression.'
path = Path(path)
if path.endswith(b".zst"):
decompress = "zstd"
elif path.endswith(b".gz") or path.endswith(b".tgz"):
decompress = "gzip"
else:
with path.open(mode="rb") as f:
yield f
return
with subprocess.Popen(
[decompress, "--decompress", "--stdout", path], stdout=subprocess.PIPE
) as proc:
yield proc.stdout
# If the caller does not consume all of `proc.stdout`, the decompressor
# program can get a SIGPIPE as it tries to write to a closed pipe.
#
# Rationale for just ignoring the signal -- I considered adding a
# mandatory `must_read_all_data` boolean arg , but decided it against it:
# - Adding this in the no-compression case feels artificial.
# - This is not typical behavior for Python file context managers -- it
# should likely be provided as a wrapper, not as part of the API --
# if it's even desirable.
# - The extra API complexity is of dubious utility.
if proc.returncode == -signal.SIGPIPE:
log.error(f"Ignoring SIGPIPE exit of child `{decompress}` for `{path}`")
else:
check_popen_returncode(proc)