def _split_exposure_validate()

in causalPartition.py [0:0]


    def _split_exposure_validate(self, node_id, df_est, result, 
                                 probabilities_est, rules, outcome, eps=0.005):

        est_result = {}
        if 'left_result' in result:
            est_result['feature'] = result['feature']
            est_result['threshold'] = result['threshold']
            est_result['left_result'] = self._split_exposure_validate(node_id*2, df_est, result['left_result'], probabilities_est, 
                                                       rules+[(result['feature'], 0, result['threshold'])], outcome, eps)
            est_result['right_result'] = self._split_exposure_validate(node_id*2+1, df_est, result['right_result'], probabilities_est, 
                                                         rules+[(result['feature'], 1, result['threshold'])], outcome, eps)
        
        if rules:
            idxs = np.product([df_est[key] <= th for key, sign, th in rules if sign == 0] + \
                   [df_est[key] > th for key, sign, th in rules if sign == 1], axis=0) > 0
            dff = df_est[idxs]
            propensities = np.mean(np.product([probabilities_est[key][idxs] <= th for key, sign, th in rules if sign == 0] + \
                   [probabilities_est[key][idxs] > th for key, sign, th in rules if sign == 1],
                   axis=0), axis=1)
            idxs_filter = propensities > eps
            dff = dff[idxs_filter]
            propensities = propensities[idxs_filter]
        else:
            dff = df_est
            propensities = np.ones(len(dff))
        
        mod = sm.OLS(dff[outcome], np.ones(len(dff)))        
        res = mod.fit()
        mse = np.sum((res.resid ** 2) * 1.0 / propensities)
        average_hajek = res.params[0]
        
        if node_id == 1:
            average_hajek_se = dff[outcome].std() / np.sqrt(len(dff)-1)
        else:
            average_hajek_se = self._hajek_se(dff, propensities, outcome)
        
        est_result['hajek'] = average_hajek
        est_result['hajek_se'] = average_hajek_se
        est_result['mse'] = mse
        est_result['N'] = len(dff)
        return est_result