in causalml/inference/tree/uplift.pyx [0:0]
def evaluate_IT(leftNodeSummary, rightNodeSummary, w_l, w_r):
'''
Calculate Squared T-Statistic as split evaluation criterion for a given node
Args
----
leftNodeSummary : list of list
The left node summary statistics.
rightNodeSummary : list of list
The right node summary statistics.
w_l: array-like, shape = [num_samples]
An array containing the treatment for each unit in the left node
w_r: array-like, shape = [num_samples]
An array containing the treatment for each unit in the right node
Returns
-------
g_s : Squared T-Statistic
'''
g_s = 0
## Control Group
# Sample mean in left & right child node
y_l_0 = leftNodeSummary[0][0]
y_r_0 = rightNodeSummary[0][0]
# Sample size left & right child node
n_3 = leftNodeSummary[0][1]
n_4 = rightNodeSummary[0][1]
# Sample variance in left & right child node (p*(p-1) for bernoulli)
s_3 = y_l_0*(1-y_l_0)
s_4 = y_r_0*(1-y_r_0)
for treatment_left, treatment_right in zip(leftNodeSummary[1:], rightNodeSummary[1:]):
## Treatment Group
# Sample mean in left & right child node
y_l_1 = treatment_left[0]
y_r_1 = treatment_right[0]
# Sample size left & right child node
n_1 = treatment_left[1]
n_2 = treatment_right[1]
# Sample variance in left & right child node
s_1 = y_l_1*(1-y_l_1)
s_2 = y_r_1*(1-y_r_1)
sum_n = np.sum([n_1 - 1, n_2 - 1, n_3 - 1, n_4 - 1])
w_1 = (n_1 - 1) / sum_n
w_2 = (n_2 - 1) / sum_n
w_3 = (n_3 - 1) / sum_n
w_4 = (n_4 - 1) / sum_n
# Pooled estimator of the constant variance
sigma = np.sqrt(np.sum([w_1 * s_1, w_2 * s_2, w_3 * s_3, w_4 * s_4]))
# Squared t-statistic
g_s = np.power(((y_l_1 - y_l_0) - (y_r_1 - y_r_0)) / (sigma * np.sqrt(np.sum([1 / n_1, 1 / n_2, 1 / n_3, 1 / n_4]))), 2)
return g_s