in simulation/decai/simulation/data/imdb_data_loader.py [0:0]
def load_data(self, train_size: int = None, test_size: int = None) -> (tuple, tuple):
self._logger.info("Loading IMDB review data using %d words.", self.num_words)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=self.num_words)
if train_size is not None:
x_train, y_train = x_train[:train_size], y_train[:train_size]
if test_size is not None:
x_test, y_test = x_test[:test_size], y_test[:test_size]
def get_features(data):
result = np.zeros((len(data), self.num_words), dtype='int')
for i, x in enumerate(data):
for v in x:
result[i, v] = 1
return result
x_train = get_features(x_train)
x_test = get_features(x_test)
self._logger.info("Done loading IMDB review data.")
return (x_train, y_train), (x_test, y_test)