def _GetNodeSummary()

in causalml/feature_selection/filters.py [0:0]
19 lines of code
6 McCabe index (conditional complexity)

    def _GetNodeSummary(self, data, 
                        experiment_group_column='treatment_group_key', 
                        y_name='conversion'):
        """
        To count the conversions and get the probabilities by treatment groups. This function comes from the uplift tree algorithm, that is used for tree node split evaluation.

        Parameters
        ----------
        data : DataFrame
            The DataFrame that contains all the data (in the current "node").  

        Returns
        -------
        results : dict
            Counts of conversions by treatment groups, of the form: 
            {'control': {0: 10, 1: 8}, 'treatment1': {0: 5, 1: 15}}
        nodeSummary: dict
            Probability of conversion and group size by treatment groups, of 
            the form:
            {'control': [0.490, 500], 'treatment1': [0.584, 500]}
        """

        # Note: results and nodeSummary are both dict with treatment_group_key
        # as the key.  So we can compute the treatment effect and/or 
        # divergence easily.

        # Counts of conversions by treatment group
        results_series = data.groupby([experiment_group_column, y_name]).size()
        
        treatment_group_keys = results_series.index.levels[0].tolist()
        y_name_keys = results_series.index.levels[1].tolist()

        results = {}
        for ti in treatment_group_keys: 
            results.update({ti: {}}) 
            for ci in y_name_keys:
                results[ti].update({ci: results_series[ti, ci]}) 

        # Probability of conversion and group size by treatment group
        nodeSummary = {}
        for treatment_group_key in results: 
            n_1 = results[treatment_group_key][1]
            n_total = (results[treatment_group_key][1] 
                       + results[treatment_group_key][0])
            y_mean = 1.0 * n_1 / n_total
            nodeSummary[treatment_group_key] = [y_mean, n_total]
        
        return results, nodeSummary