in dataloading.py [0:0]
def download(self):
"""
Process MNIST1M data if it does not exist in processed_folder already.
"""
# check if processed data does not exist:
if self._check_exists():
return
# process and save as torch files:
logging.info("Processing MNIST1M data...")
os.makedirs(self.processed_folder, exist_ok=True)
training_set = (
read_image_file(os.path.join(self.raw_folder, "mnist1m-images-idx3-ubyte")),
read_label_file(os.path.join(self.raw_folder, "mnist1m-labels-idx1-ubyte"))
)
test_set = (
read_image_file(os.path.join(self.raw_folder, "t10k-images-idx3-ubyte")),
read_label_file(os.path.join(self.raw_folder, "t10k-labels-idx1-ubyte"))
)
with open(os.path.join(self.processed_folder, self.training_file), "wb") as f:
torch.save(training_set, f)
with open(os.path.join(self.processed_folder, self.test_file), "wb") as f:
torch.save(test_set, f)
logging.info("Done!")