in causalml/inference/tree/models.py [0:0]
def normI(self, currentNodeSummary, leftNodeSummary, rightNodeSummary, control_name, alpha=0.9):
'''
Normalization factor.
Args
----
currentNodeSummary : dictionary
The summary statistics of the current tree node.
leftNodeSummary : dictionary
The summary statistics of the left tree node.
rightNodeSummary : dictionary
The summary statistics of the right tree node.
control_name : string
The control group name.
alpha : float
The weight used to balance different normalization parts.
Returns
-------
norm_res : float
Normalization factor.
'''
norm_res = 0
# n_t, n_c: sample size for all treatment, and control
# pt_a, pc_a: % of treatment is in left node, % of control is in left node
n_c = currentNodeSummary[control_name][1]
n_c_left = leftNodeSummary[control_name][1]
n_t = []
n_t_left = []
for treatment_group in currentNodeSummary:
if treatment_group != control_name:
n_t.append(currentNodeSummary[treatment_group][1])
if treatment_group in leftNodeSummary:
n_t_left.append(leftNodeSummary[treatment_group][1])
else:
n_t_left.append(0)
pt_a = 1. * np.sum(n_t_left) / (np.sum(n_t) + 0.1)
pc_a = 1. * n_c_left / (n_c + 0.1)
# Normalization Part 1
norm_res += (
alpha * self.entropyH(1. * np.sum(n_t) / (np.sum(n_t) + n_c), 1. * n_c / (np.sum(n_t) + n_c))
* self.kl_divergence(pt_a, pc_a)
)
# Normalization Part 2 & 3
for i in range(len(n_t)):
pt_a_i = 1. * n_t_left[i] / (n_t[i] + 0.1)
norm_res += (
(1 - alpha) * self.entropyH(1. * n_t[i] / (n_t[i] + n_c), 1. * n_c / (n_t[i] + n_c))
* self.kl_divergence(1. * pt_a_i, pc_a)
)
norm_res += (1. * n_t[i] / (np.sum(n_t) + n_c) * self.entropyH(pt_a_i))
# Normalization Part 4
norm_res += 1. * n_c/(np.sum(n_t) + n_c) * self.entropyH(pc_a)
# Normalization Part 5
norm_res += 0.5
return norm_res