def tardata()

in awsio/python/lib/io/s3/s3dataset.py [0:0]


def tardata(fileobj, skip_meta=r"__[^/]*__($|/)", handler=reraise_exception):
    """Iterator yielding filename, content pairs for the given tar stream.
    """
    # eliminated from test coverage since checking requires invalid tarfile
    try:
        stream = tarfile.open(fileobj=io.BytesIO(fileobj), mode="r|*")
        for tarinfo in stream:
            try:
                if not tarinfo.isreg(): # pragma: no cover
                    continue
                fname = tarinfo.name
                if fname is None: # pragma: no cover
                    continue
                if ("/" not in fname and fname.startswith(meta_prefix)
                        and fname.endswith(meta_suffix)): # pragma: no cover
                    # skipping metadata for now
                    continue
                if skip_meta is not None and re.match(skip_meta, fname): # pragma: no cover
                    continue
                data = stream.extractfile(tarinfo).read()
                yield fname, data
            except Exception as exn: # pragma: no cover
                if handler(exn):
                    continue
                else:
                    break
        del stream
    except Exception as exn: # pragma: no cover
        handler(exn)