in torchdata/datapipes/iter/util/hashchecker.py [0:0]
def __iter__(self) -> Iterator[Tuple[str, StreamWrapper]]:
for file_name, data in self.source_datapipe:
if self.hash_type == "sha256":
hash_func = hashlib.sha256()
else:
hash_func = hashlib.md5()
if isinstance(data, (str, bytes, bytearray)):
if isinstance(data, str):
data = data.decode()
hash_func.update(data)
# File Stream
else:
# Not all streams have `read(bytes)` method.
# `__iter__` method is chosen because it is a common interface for IOBase.
for d in data:
hash_func.update(d)
# TODO(133): this will not work (or work crappy for non-seekable steams like http)
if self.rewind:
data.seek(0)
if file_name not in self.hash_dict:
raise RuntimeError(f"Unspecified hash for file {file_name}")
if hash_func.hexdigest() != self.hash_dict[file_name]:
raise RuntimeError(
f"The computed hash {hash_func.hexdigest()} of {file_name} does not match the expected"
f"hash {self.hash_dict[file_name]}. Delete the file manually and retry."
)
if isinstance(data, (str, bytes, bytearray)):
yield file_name, data
else:
yield file_name, StreamWrapper(data)