in causalPartition.py [0:0]
def _split_exposure_validate_eht(self, node_id, df_est, result, probabilities_est, rules, outcome, eps=0.005):
"""
estimation set for non-separate case
"""
est_result = {}
if 'left_result' in result:
est_result['feature'] = result['feature']
est_result['threshold'] = result['threshold']
est_result['left_result'] = self._split_exposure_validate_eht(node_id*2, df_est, result['left_result'], probabilities_est,
rules+[(result['feature'], 0, result['threshold'])], outcome, eps)
est_result['right_result'] = self._split_exposure_validate_eht(node_id*2+1, df_est, result['right_result'], probabilities_est,
rules+[(result['feature'], 1, result['threshold'])], outcome, eps)
if rules:
# if this is not the root
idxs = np.product([df_est[key] <= th for key, sign, th in rules if sign == 0] + \
[df_est[key] > th for key, sign, th in rules if sign == 1],
axis=0) > 0
dff = df_est[idxs]
else:
idxs = np.ones(len(df_est)).astype(bool)
dff = df_est
propensities_1 = np.mean(np.product([probabilities_est[key][idxs] <= th for key, sign, th in rules if sign == 0] + \
[probabilities_est[key][idxs] > th for key, sign, th in rules if sign == 1]+\
[probabilities_est[self.treatment][idxs] == 1],
axis=0), axis=1)
propensities_0 = np.mean(np.product([probabilities_est[key][idxs] <= th for key, sign, th in rules if sign == 0] + \
[probabilities_est[key][idxs] > th for key, sign, th in rules if sign == 1]+\
[probabilities_est[self.treatment][idxs] == 0],
axis=0), axis=1)
idxs_filter = np.logical_and(propensities_1 > 0, propensities_0 > 0)
dff = dff[idxs_filter]
propensities_1 = propensities_1[idxs_filter]
propensities_0 = propensities_0[idxs_filter]
mod = sm.WLS(dff[outcome], sm.add_constant(dff[self.treatment]),
weights=1.0 / propensities_1 * dff[self.treatment] + 1.0 / propensities_0 * (1-dff[self.treatment]))
res = mod.fit()
mse = np.sum((res.resid ** 2) * (1.0 / propensities_1 * dff[self.treatment] + 1.0 / propensities_0 * (1-dff[self.treatment])))
average_hajek = res.params[1]
average_hajek_se = res.bse[1] # dff[outcome].std() / np.sqrt(len(dff)-1)
est_result['hajek'] = average_hajek
est_result['hajek_se'] = average_hajek_se
est_result['mse'] = mse
est_result['N'] = len(dff)
return est_result