in awsio/python/lib/io/s3/s3dataset.py [0:0]
def tardata(fileobj, skip_meta=r"__[^/]*__($|/)", handler=reraise_exception):
"""Iterator yielding filename, content pairs for the given tar stream.
"""
# eliminated from test coverage since checking requires invalid tarfile
try:
stream = tarfile.open(fileobj=io.BytesIO(fileobj), mode="r|*")
for tarinfo in stream:
try:
if not tarinfo.isreg(): # pragma: no cover
continue
fname = tarinfo.name
if fname is None: # pragma: no cover
continue
if ("/" not in fname and fname.startswith(meta_prefix)
and fname.endswith(meta_suffix)): # pragma: no cover
# skipping metadata for now
continue
if skip_meta is not None and re.match(skip_meta, fname): # pragma: no cover
continue
data = stream.extractfile(tarinfo).read()
yield fname, data
except Exception as exn: # pragma: no cover
if handler(exn):
continue
else:
break
del stream
except Exception as exn: # pragma: no cover
handler(exn)