in mnist-vscode-docs-sample/utils.py [0:0]
def prepare_data(dataset, data_folder):
data_folder = os.path.join(data_folder, dataset)
print('making data directory ' + data_folder + '...')
os.makedirs(data_folder, exist_ok = True)
def download_data(url, filename):
if not os.path.isfile(filename):
print('downloading ' + url)
urllib.request.urlretrieve(url, filename = filename)
else:
print(filename + ' exists, using it')
print('downloading training data ...')
download_data('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', './data/mnist/train-images.gz')
download_data('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', './data/mnist/train-labels.gz')
print('done.')
print('downloading testing data ...')
download_data('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', './data/mnist/test-images.gz')
download_data('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', './data/mnist/test-labels.gz')
print('done.')
print('Prepared training dataset is stored here:', data_folder)
X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0
X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0
y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1)
y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\n')
return X_train, X_test, y_train, y_test