in causalml/inference/tree/uplift.pyx [0:0]
def arr_evaluate_CIT(np.ndarray[P_TYPE_t, ndim=1] cur_node_summary_p,
np.ndarray[N_TYPE_t, ndim=1] cur_node_summary_n,
np.ndarray[P_TYPE_t, ndim=1] left_node_summary_p,
np.ndarray[N_TYPE_t, ndim=1] left_node_summary_n,
np.ndarray[P_TYPE_t, ndim=1] right_node_summary_p,
np.ndarray[N_TYPE_t, ndim=1] right_node_summary_n):
'''
Calculate likelihood ratio test statistic as split evaluation criterion for a given node
NOTE: n_class should be 2.
Args
----
cur_node_summary_p : array of shape [n_class]
Has type numpy.double.
The positive probabilities of each of the control
and treament groups of the current node, i.e. [P(Y=1|T=i)...]
cur_node_summary_n : array of shape [n_class]
Has type numpy.int32.
The counts of each of the control
and treament groups of the current node, i.e. [N(T=i)...]
left_node_summary_p : array of shape [n_class]
Has type numpy.double.
The positive probabilities of each of the control
and treament groups of the left node, i.e. [P(Y=1|T=i)...]
left_node_summary_n : array of shape [n_class]
Has type numpy.int32.
The counts of each of the control
and treament groups of the left node, i.e. [N(T=i)...]
right_node_summary_p : array of shape [n_class]
Has type numpy.double.
The positive probabilities of each of the control
and treament groups of the right node, i.e. [P(Y=1|T=i)...]
right_node_summary_n : array of shape [n_class]
Has type numpy.int32.
The counts of each of the control
and treament groups of the right node, i.e. [N(T=i)...]
Returns
-------
lrt : Likelihood ratio test statistic
'''
cdef P_TYPE_t lrt = 0.0
# since will take log of these N, so use a double type
# Control sample size left & right child node
cdef P_TYPE_t n_l_t_0 = left_node_summary_n[0]
cdef P_TYPE_t n_r_t_0 = right_node_summary_n[0]
# Treatment sample size left & right child node
cdef P_TYPE_t n_l_t_1 = left_node_summary_n[1]
cdef P_TYPE_t n_r_t_1 = right_node_summary_n[1]
# Total size of left & right node
cdef P_TYPE_t n_l_t = n_l_t_1 + n_l_t_0
cdef P_TYPE_t n_r_t = n_r_t_1 + n_r_t_0
# Total size of parent node
cdef P_TYPE_t n_t = n_l_t + n_r_t
# Total treatment & control size in parent node
cdef P_TYPE_t n_t_1 = n_l_t_1 + n_r_t_1
cdef P_TYPE_t n_t_0 = n_l_t_0 + n_r_t_0
# NOTE: the original code for sse_tau_l and sse_tau_r does not seem to follow the paper.
# sse = \sum_{i for treatment} (y_i - p_treatment)^2 + \sum_{i for control} (y_i - p_control)^2
# NOTE: since for classification, the y is either 0 or 1, we can calculate sse more simply
# for y being 0 or 1, sse = n*p*(1-p), but here need to calculate separately for treatment and control groups.
# Standard squared error of left child node
cdef P_TYPE_t sse_tau_l = n_l_t_0 * left_node_summary_p[0] * (1.0 - left_node_summary_p[0]) + n_l_t_1 * left_node_summary_p[1] * (1.0 - left_node_summary_p[1])
# Standard squared error of right child node
cdef P_TYPE_t sse_tau_r = n_r_t_0 * right_node_summary_p[0] * (1.0 - right_node_summary_p[0]) + n_r_t_1 * right_node_summary_p[1] * (1.0 - right_node_summary_p[1])
# Standard squared error of parent child node
cdef P_TYPE_t sse_tau = n_t_0 * cur_node_summary_p[0] * (1.0 - cur_node_summary_p[0]) + n_t_1 * cur_node_summary_p[1] * (1.0 - cur_node_summary_p[1])
# Maximized log-likelihood function
cdef P_TYPE_t i_tau_l = - (n_l_t / 2.0) * log(n_l_t * sse_tau_l) + n_l_t_1 * log(n_l_t_1) + n_l_t_0 * log(n_l_t_0)
cdef P_TYPE_t i_tau_r = - (n_r_t / 2.0) * log(n_r_t * sse_tau_r) + n_r_t_1 * log(n_r_t_1) + n_r_t_0 * log(n_r_t_0)
cdef P_TYPE_t i_tau = - (n_t / 2.0) * log(n_t * sse_tau) + n_t_1 * log(n_t_1) + n_t_0 * log(n_t_0)
# Likelihood ration test statistic
lrt = 2 * (i_tau_l + i_tau_r - i_tau)
return lrt