def _get_cifar()

in utils/data_utils.py [0:0]
86 lines of code
6 McCabe index (conditional complexity)

def _get_cifar(data_dir, is_cifar_100):
    """
    Get the CIFAR-10 and CIFAR-100 datasets

    Args:
        data_dir        Directory where the downloaded data will be stored
    """
    x_train = None
    y_train = None
    x_validation = None
    y_validation = None
    x_test = None
    y_test = None
    l = None

    # Download the dataset if needed
    _cifar_maybe_download_and_extract(data_dir)

    # Dictionary to store the dataset
    dataset = dict()
    dataset['train'] = []
    dataset['validation'] = []
    dataset['test'] = []

    def dense_to_one_hot(labels_dense, num_classes=100):
        num_labels = labels_dense.shape[0]
        index_offset = np.arange(num_labels) * num_classes
        labels_one_hot = np.zeros((num_labels, num_classes))
        labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1

        return labels_one_hot

    if is_cifar_100:
        # Load the training data of CIFAR-100
        f = open(data_dir + CIFAR_100_DIR + '/train', 'rb')
        datadict = pickle.load(f)
        f.close()
    
        _X = datadict['data']
        _Y = np.array(datadict['fine_labels'])
        _Y = dense_to_one_hot(_Y, num_classes=100)

        _X = np.array(_X, dtype=float) / 255.0
        _X = _X.reshape([-1, 3, 32, 32])
        _X = _X.transpose([0, 2, 3, 1])
    
        # Compute the data mean for normalization
        x_train_mean = np.mean(_X, axis=0)

        x_train = _X[:40000]
        y_train = _Y[:40000]

        x_validation = _X[40000:]
        y_validation = _Y[40000:]
    else:
    	# Load all the training batches of the CIFAR-10
    	for i in range(5):
            f = open(data_dir + CIFAR_10_DIR + '/data_batch_' + str(i + 1), 'rb')
            datadict = pickle.load(f)
            f.close()
            
            _X = datadict['data']
            _Y = np.array(datadict['labels'])
            _Y = dense_to_one_hot(_Y, num_classes=10)
            
            _X = np.array(_X, dtype=float) / 255.0
            _X = _X.reshape([-1, 3, 32, 32])
            _X = _X.transpose([0, 2, 3, 1])
            
            if x_train is None:
                x_train = _X
                y_train = _Y
            else:
            	x_train = np.concatenate((x_train, _X), axis=0)
            	y_train = np.concatenate((y_train, _Y), axis=0)
    
        # Compute the data mean for normalization
        x_train_mean = np.mean(x_train, axis=0)
        x_validation = x_train[:40000] # We don't use validation set with CIFAR-10
        y_validation = y_train[40000:]

    # Normalize the train and validation sets
    x_train -= x_train_mean
    x_validation -= x_train_mean

    dataset['train'].append(x_train)
    dataset['train'].append(y_train)
    dataset['train'].append(l)

    dataset['validation'].append(x_validation)
    dataset['validation'].append(y_validation)
    dataset['validation'].append(l)

    if is_cifar_100:
        # Load the test batch of CIFAR-100
        f = open(data_dir + CIFAR_100_DIR + '/test', 'rb')
        datadict = pickle.load(f)
        f.close()
    
        _X = datadict['data']
        _Y = np.array(datadict['fine_labels'])
        _Y = dense_to_one_hot(_Y, num_classes=100)
    else:
        # Load the test batch of CIFAR-10
        f = open(data_dir + CIFAR_10_DIR + '/test_batch', 'rb')
        datadict = pickle.load(f)
        f.close()

        _X = datadict["data"]
        _Y = np.array(datadict['labels'])
        _Y = dense_to_one_hot(_Y, num_classes=10)

    _X = np.array(_X, dtype=float) / 255.0
    _X = _X.reshape([-1, 3, 32, 32])
    _X = _X.transpose([0, 2, 3, 1])

    x_test = _X
    y_test = _Y

    # Normalize the test set
    x_test -= x_train_mean

    dataset['test'].append(x_test)
    dataset['test'].append(y_test)
    dataset['test'].append(l)

    return dataset