in src/pipeline/prepare.py [0:0]
def convert_sparse_matrix(df, nb_rows, nb_customer, nb_products):
# dataframe to array
df_val = df.values
# determine feature size
nb_cols = nb_customer + nb_products
print("# of rows = {}".format(str(nb_rows)))
print("# of cols = {}".format(str(nb_cols)))
# extract customers and ratings
df_X = df_val[:, 0:2]
# Features are one-hot encoded in a sparse matrix
X = lil_matrix((nb_rows, nb_cols)).astype('float32')
df_X[:, 1] = nb_customer + df_X[:, 1]
coords = df_X[:, 0:2]
X[np.arange(nb_rows), coords[:, 0]] = 1
X[np.arange(nb_rows), coords[:, 1]] = 1
# create label with ratings
Y = df_val[:, 2].astype('float32')
# validate size and shape
print(X.shape)
print(Y.shape)
assert X.shape == (nb_rows, nb_cols)
assert Y.shape == (nb_rows, )
return X, Y