def compute_categorical_features_dict()

in bindings/python-compute/feature_differentiation.py [0:0]


    def compute_categorical_features_dict(self):
        if self.categorical_features is None:
            # if data type is non-number or has small number of unique values
            self.categorical_features = [c for c in self.feature_df.columns
                            if len(self.feature_df[c].unique()) < 7 or self.feature_df.dtypes[c] == 'object']
        cat_dict = {}
        # create a dict of list, dict fields are categorical parent features,
        # and list elements are parent feature names suffixed with category names
        for c in self.categorical_features:
            # in case of features are already one-hot encoded
            c_split = c.split(DUMMY_PREFIX_SEP)
            try:
                cat_dict[c_split[0]]
            except:
                cat_dict[c_split[0]] = []
            cat_dict[c_split[0]] += [c]
        return cat_dict