in src/datasets/utils/extract.py [0:0]
def is_extractable(cls, path: Union[Path, str], magic_number: bytes = b"") -> bool:
if super().is_extractable(path, magic_number=magic_number):
return True
try:
# Alternative version of zipfile.is_zipfile that has less false positives, but misses executable zip archives.
# From: https://github.com/python/cpython/pull/5053
from zipfile import (
_CD_SIGNATURE,
_ECD_DISK_NUMBER,
_ECD_DISK_START,
_ECD_ENTRIES_TOTAL,
_ECD_OFFSET,
_ECD_SIZE,
_EndRecData,
sizeCentralDir,
stringCentralDir,
structCentralDir,
)
with open(path, "rb") as fp:
endrec = _EndRecData(fp)
if endrec:
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
return True # Empty zipfiles are still zipfiles
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
fp.seek(endrec[_ECD_OFFSET]) # Central directory is on the same disk
if fp.tell() == endrec[_ECD_OFFSET] and endrec[_ECD_SIZE] >= sizeCentralDir:
data = fp.read(sizeCentralDir) # CD is where we expect it to be
if len(data) == sizeCentralDir:
centdir = struct.unpack(structCentralDir, data) # CD is the right size
if centdir[_CD_SIGNATURE] == stringCentralDir:
return True # First central directory entry has correct magic number
return False
except Exception: # catch all errors in case future python versions change the zipfile internals
return False