examples/spark_dataset_converter/utils.py (16 lines of code) (raw):
import os
import tempfile
import requests
def download_mnist_libsvm(mnist_data_dir):
mnist_data_path = os.path.join(mnist_data_dir, "mnist.bz2")
data_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist.bz2"
r = requests.get(data_url)
with open(mnist_data_path, "wb") as f:
f.write(r.content)
def get_mnist_dir():
# This folder is baked into the docker image
MNIST_DATA_DIR = "/data/mnist/"
if os.path.isdir(MNIST_DATA_DIR) and os.path.isfile(os.path.join(MNIST_DATA_DIR, 'mnist.bz2')):
return MNIST_DATA_DIR
mnist_dir = tempfile.mkdtemp('_mnist_data')
download_mnist_libsvm(mnist_dir)
return mnist_dir