in bindings/python-compute/feature_differentiation.py [0:0]
def compute_categorical_features_dict(self):
if self.categorical_features is None:
# if data type is non-number or has small number of unique values
self.categorical_features = [c for c in self.feature_df.columns
if len(self.feature_df[c].unique()) < 7 or self.feature_df.dtypes[c] == 'object']
cat_dict = {}
# create a dict of list, dict fields are categorical parent features,
# and list elements are parent feature names suffixed with category names
for c in self.categorical_features:
# in case of features are already one-hot encoded
c_split = c.split(DUMMY_PREFIX_SEP)
try:
cat_dict[c_split[0]]
except:
cat_dict[c_split[0]] = []
cat_dict[c_split[0]] += [c]
return cat_dict