def prepare_data()

in mnist-vscode-docs-sample/utils.py [0:0]


def prepare_data(dataset, data_folder):
    data_folder = os.path.join(data_folder, dataset)
    print('making data directory ' + data_folder + '...')
    os.makedirs(data_folder, exist_ok = True)

    def download_data(url, filename):
        if not os.path.isfile(filename):
            print('downloading ' + url)
            urllib.request.urlretrieve(url, filename = filename)
        else:
            print(filename + ' exists, using it')

    print('downloading training data ...')
    download_data('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', './data/mnist/train-images.gz')
    download_data('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', './data/mnist/train-labels.gz')
    print('done.')
    print('downloading testing data ...')
    download_data('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', './data/mnist/test-images.gz')
    download_data('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', './data/mnist/test-labels.gz')
    print('done.')

    print('Prepared training dataset is stored here:', data_folder)

    X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0
    X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0

    y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1)
    y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1)

    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\n')

    return X_train, X_test, y_train, y_test