in causalml/inference/tree/uplift.pyx [0:0]
def arr_evaluate_IT(np.ndarray[P_TYPE_t, ndim=1] left_node_summary_p,
np.ndarray[N_TYPE_t, ndim=1] left_node_summary_n,
np.ndarray[P_TYPE_t, ndim=1] right_node_summary_p,
np.ndarray[N_TYPE_t, ndim=1] right_node_summary_n):
'''
Calculate Squared T-Statistic as split evaluation criterion for a given node
NOTE: n_class should be 2.
Args
----
left_node_summary_p : array of shape [n_class]
Has type numpy.double.
The positive probabilities of each of the control
and treament groups of the left node, i.e. [P(Y=1|T=i)...]
left_node_summary_n : array of shape [n_class]
Has type numpy.int32.
The counts of each of the control
and treament groups of the left node, i.e. [N(T=i)...]
right_node_summary_p : array of shape [n_class]
Has type numpy.double.
The positive probabilities of each of the control
and treament groups of the right node, i.e. [P(Y=1|T=i)...]
right_node_summary_n : array of shape [n_class]
Has type numpy.int32.
The counts of each of the control
and treament groups of the right node, i.e. [N(T=i)...]
Returns
-------
g_s : Squared T-Statistic
'''
## Control Group
# Sample mean in left & right child node
cdef P_TYPE_t y_l_0 = left_node_summary_p[0]
cdef P_TYPE_t y_r_0 = right_node_summary_p[0]
# Sample size left & right child node
cdef N_TYPE_t n_3 = left_node_summary_n[0]
cdef N_TYPE_t n_4 = right_node_summary_n[0]
# Sample variance in left & right child node (p*(p-1) for bernoulli)
cdef P_TYPE_t s_3 = y_l_0*(1-y_l_0)
cdef P_TYPE_t s_4 = y_r_0*(1-y_r_0)
# only one treatment, contrast with control, so no need to loop
## Treatment Group
# Sample mean in left & right child node
cdef P_TYPE_t y_l_1 = left_node_summary_p[1]
cdef P_TYPE_t y_r_1 = right_node_summary_p[1]
# Sample size left & right child node
cdef N_TYPE_t n_1 = left_node_summary_n[1]
cdef N_TYPE_t n_2 = right_node_summary_n[1]
# Sample variance in left & right child node
cdef P_TYPE_t s_1 = y_l_1*(1-y_l_1)
cdef P_TYPE_t s_2 = y_r_1*(1-y_r_1)
cdef P_TYPE_t sum_n = (n_1 - 1) + (n_2 - 1) + (n_3 - 1) + (n_4 - 1)
cdef P_TYPE_t w_1 = (n_1 - 1) / sum_n
cdef P_TYPE_t w_2 = (n_2 - 1) / sum_n
cdef P_TYPE_t w_3 = (n_3 - 1) / sum_n
cdef P_TYPE_t w_4 = (n_4 - 1) / sum_n
# Pooled estimator of the constant variance
cdef P_TYPE_t sigma = sqrt(w_1 * s_1 + w_2 * s_2 + w_3 * s_3 + w_4 * s_4)
# Squared t-statistic
cdef P_TYPE_t g_s = ((y_l_1 - y_l_0) - (y_r_1 - y_r_0)) / (sigma * sqrt(1.0 / n_1 + 1.0 / n_2 + 1.0 / n_3 + 1.0 / n_4))
g_s = g_s * g_s
return g_s