in torchdata/datapipes/iter/util/tararchivereader.py [0:0]
def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]:
for data in self.datapipe:
validate_pathname_binary_tuple(data)
pathname, data_stream = data
try:
# typing.cast is used here to silence mypy's type checker
tar = tarfile.open(fileobj=cast(Optional[IO[bytes]], data_stream), mode=self.mode)
for tarinfo in tar:
if not tarinfo.isfile():
continue
extracted_fobj = tar.extractfile(tarinfo)
if extracted_fobj is None:
warnings.warn(f"failed to extract file {tarinfo.name} from source tarfile {pathname}")
raise tarfile.ExtractError
inner_pathname = os.path.normpath(os.path.join(pathname, tarinfo.name))
yield inner_pathname, StreamWrapper(extracted_fobj) # type: ignore[misc]
except Exception as e:
warnings.warn(f"Unable to extract files from corrupted tarfile stream {pathname} due to: {e}, abort!")
raise e