in datasets/loans.py [0:0]
def project_feats(self, feats=None):
"""
return subset of data over feats
"""
if feats is None:
feats = self.domain
feats_domain = {key: self.domain[key] for key in feats}
# get binning of attributes
bins_size_array = [
(size_bin, np.digitize(self.df[col], range(size_bin + 1), right=True))
for col, size_bin in feats_domain.items()
]
# perform one-hot-encoding of all features and stack them into a numpy matrix
bin_dataset = np.hstack(
[np.eye(size_bin)[bin_array] for size_bin, bin_array in bins_size_array]
)
return bin_dataset