in torchdata/datapipes/iter/util/ziparchivereader.py [0:0]
def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]:
for data in self.datapipe:
validate_pathname_binary_tuple(data)
pathname, data_stream = data
try:
# typing.cast is used here to silence mypy's type checker
zips = zipfile.ZipFile(cast(IO[bytes], data_stream))
for zipinfo in zips.infolist():
# major version should always be 3 here.
if sys.version_info[1] >= 6:
if zipinfo.is_dir():
continue
elif zipinfo.filename.endswith("/"):
continue
extracted_fobj = zips.open(zipinfo)
inner_pathname = os.path.normpath(os.path.join(pathname, zipinfo.filename))
yield inner_pathname, StreamWrapper(extracted_fobj) # type: ignore[misc]
except Exception as e:
warnings.warn(f"Unable to extract files from corrupted zipfile stream {pathname} due to: {e}, abort!")
raise e