def arr_evaluate

def arr_evaluate_CIT()

in causalml/inference/tree/uplift.pyx [0:0]
57 lines of code
11 McCabe index (conditional complexity)

    def arr_evaluate_CIT(np.ndarray[P_TYPE_t, ndim=1] cur_node_summary_p,
                         np.ndarray[N_TYPE_t, ndim=1] cur_node_summary_n,
                         np.ndarray[P_TYPE_t, ndim=1] left_node_summary_p,
                         np.ndarray[N_TYPE_t, ndim=1] left_node_summary_n,
                         np.ndarray[P_TYPE_t, ndim=1] right_node_summary_p,
                         np.ndarray[N_TYPE_t, ndim=1] right_node_summary_n):
        '''
        Calculate likelihood ratio test statistic as split evaluation criterion for a given node
        
        NOTE: n_class should be 2.

        Args
        ----
        cur_node_summary_p : array of shape [n_class]
            Has type numpy.double.
            The positive probabilities of each of the control
            and treament groups of the current node, i.e. [P(Y=1|T=i)...]
        cur_node_summary_n : array of shape [n_class]
            Has type numpy.int32.
            The counts of each of the control
            and treament groups of the current node, i.e. [N(T=i)...]
        left_node_summary_p : array of shape [n_class]
            Has type numpy.double.
            The positive probabilities of each of the control
            and treament groups of the left node, i.e. [P(Y=1|T=i)...]
        left_node_summary_n : array of shape [n_class]
            Has type numpy.int32.
            The counts of each of the control
            and treament groups of the left node, i.e. [N(T=i)...]
        right_node_summary_p : array of shape [n_class]
            Has type numpy.double.
            The positive probabilities of each of the control
            and treament groups of the right node, i.e. [P(Y=1|T=i)...]
        right_node_summary_n : array of shape [n_class]
            Has type numpy.int32.
            The counts of each of the control
            and treament groups of the right node, i.e. [N(T=i)...]
                
        Returns
        -------
        lrt : Likelihood ratio test statistic
        '''
        cdef P_TYPE_t lrt = 0.0

        # since will take log of these N, so use a double type

        # Control sample size left & right child node
        cdef P_TYPE_t n_l_t_0 = left_node_summary_n[0]
        cdef P_TYPE_t n_r_t_0 = right_node_summary_n[0]

        # Treatment sample size left & right child node
        cdef P_TYPE_t n_l_t_1 = left_node_summary_n[1]
        cdef P_TYPE_t n_r_t_1 = right_node_summary_n[1]

        # Total size of left & right node
        cdef P_TYPE_t n_l_t = n_l_t_1 + n_l_t_0
        cdef P_TYPE_t n_r_t = n_r_t_1 + n_r_t_0

        # Total size of parent node
        cdef P_TYPE_t n_t = n_l_t + n_r_t

        # Total treatment & control size in parent node
        cdef P_TYPE_t n_t_1 = n_l_t_1 + n_r_t_1
        cdef P_TYPE_t n_t_0 = n_l_t_0 + n_r_t_0

        # NOTE: the original code for sse_tau_l and sse_tau_r does not seem to follow the paper.
        # sse = \sum_{i for treatment} (y_i - p_treatment)^2 + \sum_{i for control} (y_i - p_control)^2

        # NOTE: since for classification, the y is either 0 or 1, we can calculate sse more simply
        # for y being 0 or 1, sse = n*p*(1-p), but here need to calculate separately for treatment and control groups.

        # Standard squared error of left child node
        cdef P_TYPE_t sse_tau_l = n_l_t_0 * left_node_summary_p[0] * (1.0 - left_node_summary_p[0]) + n_l_t_1 * left_node_summary_p[1] * (1.0 - left_node_summary_p[1])

        # Standard squared error of right child node
        cdef P_TYPE_t sse_tau_r = n_r_t_0 * right_node_summary_p[0] * (1.0 - right_node_summary_p[0]) + n_r_t_1 * right_node_summary_p[1] * (1.0 - right_node_summary_p[1])

        # Standard squared error of parent child node
        cdef P_TYPE_t sse_tau = n_t_0 * cur_node_summary_p[0] * (1.0 - cur_node_summary_p[0]) + n_t_1 * cur_node_summary_p[1] * (1.0 - cur_node_summary_p[1])

        # Maximized log-likelihood function
        cdef P_TYPE_t i_tau_l = - (n_l_t / 2.0) * log(n_l_t * sse_tau_l) + n_l_t_1 * log(n_l_t_1) + n_l_t_0 * log(n_l_t_0)
        cdef P_TYPE_t i_tau_r = - (n_r_t / 2.0) * log(n_r_t * sse_tau_r) + n_r_t_1 * log(n_r_t_1) + n_r_t_0 * log(n_r_t_0)
        cdef P_TYPE_t i_tau = - (n_t / 2.0) * log(n_t * sse_tau) + n_t_1 * log(n_t_1) + n_t_0 * log(n_t_0)

        # Likelihood ration test statistic
        lrt = 2 * (i_tau_l + i_tau_r - i_tau)

        return lrt