in causalPartition.py [0:0]
def _split_exposure_validate(self, node_id, df_est, result,
probabilities_est, rules, outcome, eps=0.005):
est_result = {}
if 'left_result' in result:
est_result['feature'] = result['feature']
est_result['threshold'] = result['threshold']
est_result['left_result'] = self._split_exposure_validate(node_id*2, df_est, result['left_result'], probabilities_est,
rules+[(result['feature'], 0, result['threshold'])], outcome, eps)
est_result['right_result'] = self._split_exposure_validate(node_id*2+1, df_est, result['right_result'], probabilities_est,
rules+[(result['feature'], 1, result['threshold'])], outcome, eps)
if rules:
idxs = np.product([df_est[key] <= th for key, sign, th in rules if sign == 0] + \
[df_est[key] > th for key, sign, th in rules if sign == 1], axis=0) > 0
dff = df_est[idxs]
propensities = np.mean(np.product([probabilities_est[key][idxs] <= th for key, sign, th in rules if sign == 0] + \
[probabilities_est[key][idxs] > th for key, sign, th in rules if sign == 1],
axis=0), axis=1)
idxs_filter = propensities > eps
dff = dff[idxs_filter]
propensities = propensities[idxs_filter]
else:
dff = df_est
propensities = np.ones(len(dff))
mod = sm.OLS(dff[outcome], np.ones(len(dff)))
res = mod.fit()
mse = np.sum((res.resid ** 2) * 1.0 / propensities)
average_hajek = res.params[0]
if node_id == 1:
average_hajek_se = dff[outcome].std() / np.sqrt(len(dff)-1)
else:
average_hajek_se = self._hajek_se(dff, propensities, outcome)
est_result['hajek'] = average_hajek
est_result['hajek_se'] = average_hajek_se
est_result['mse'] = mse
est_result['N'] = len(dff)
return est_result