in utils/data_utils.py [0:0]
def _get_cifar(data_dir, is_cifar_100):
"""
Get the CIFAR-10 and CIFAR-100 datasets
Args:
data_dir Directory where the downloaded data will be stored
"""
x_train = None
y_train = None
x_validation = None
y_validation = None
x_test = None
y_test = None
l = None
# Download the dataset if needed
_cifar_maybe_download_and_extract(data_dir)
# Dictionary to store the dataset
dataset = dict()
dataset['train'] = []
dataset['validation'] = []
dataset['test'] = []
def dense_to_one_hot(labels_dense, num_classes=100):
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
if is_cifar_100:
# Load the training data of CIFAR-100
f = open(data_dir + CIFAR_100_DIR + '/train', 'rb')
datadict = pickle.load(f)
f.close()
_X = datadict['data']
_Y = np.array(datadict['fine_labels'])
_Y = dense_to_one_hot(_Y, num_classes=100)
_X = np.array(_X, dtype=float) / 255.0
_X = _X.reshape([-1, 3, 32, 32])
_X = _X.transpose([0, 2, 3, 1])
# Compute the data mean for normalization
x_train_mean = np.mean(_X, axis=0)
x_train = _X[:40000]
y_train = _Y[:40000]
x_validation = _X[40000:]
y_validation = _Y[40000:]
else:
# Load all the training batches of the CIFAR-10
for i in range(5):
f = open(data_dir + CIFAR_10_DIR + '/data_batch_' + str(i + 1), 'rb')
datadict = pickle.load(f)
f.close()
_X = datadict['data']
_Y = np.array(datadict['labels'])
_Y = dense_to_one_hot(_Y, num_classes=10)
_X = np.array(_X, dtype=float) / 255.0
_X = _X.reshape([-1, 3, 32, 32])
_X = _X.transpose([0, 2, 3, 1])
if x_train is None:
x_train = _X
y_train = _Y
else:
x_train = np.concatenate((x_train, _X), axis=0)
y_train = np.concatenate((y_train, _Y), axis=0)
# Compute the data mean for normalization
x_train_mean = np.mean(x_train, axis=0)
x_validation = x_train[:40000] # We don't use validation set with CIFAR-10
y_validation = y_train[40000:]
# Normalize the train and validation sets
x_train -= x_train_mean
x_validation -= x_train_mean
dataset['train'].append(x_train)
dataset['train'].append(y_train)
dataset['train'].append(l)
dataset['validation'].append(x_validation)
dataset['validation'].append(y_validation)
dataset['validation'].append(l)
if is_cifar_100:
# Load the test batch of CIFAR-100
f = open(data_dir + CIFAR_100_DIR + '/test', 'rb')
datadict = pickle.load(f)
f.close()
_X = datadict['data']
_Y = np.array(datadict['fine_labels'])
_Y = dense_to_one_hot(_Y, num_classes=100)
else:
# Load the test batch of CIFAR-10
f = open(data_dir + CIFAR_10_DIR + '/test_batch', 'rb')
datadict = pickle.load(f)
f.close()
_X = datadict["data"]
_Y = np.array(datadict['labels'])
_Y = dense_to_one_hot(_Y, num_classes=10)
_X = np.array(_X, dtype=float) / 255.0
_X = _X.reshape([-1, 3, 32, 32])
_X = _X.transpose([0, 2, 3, 1])
x_test = _X
y_test = _Y
# Normalize the test set
x_test -= x_train_mean
dataset['test'].append(x_test)
dataset['test'].append(y_test)
dataset['test'].append(l)
return dataset