def _split_exposure_validate_eht()

in causalPartition.py [0:0]


    def _split_exposure_validate_eht(self, node_id, df_est, result, probabilities_est, rules, outcome, eps=0.005):
        """
        estimation set for non-separate case
        """
        est_result = {}
        if 'left_result' in result:
            est_result['feature'] = result['feature']
            est_result['threshold'] = result['threshold']
            est_result['left_result'] = self._split_exposure_validate_eht(node_id*2, df_est, result['left_result'], probabilities_est, 
                                                       rules+[(result['feature'], 0, result['threshold'])], outcome, eps)
            est_result['right_result'] = self._split_exposure_validate_eht(node_id*2+1, df_est, result['right_result'], probabilities_est, 
                                                         rules+[(result['feature'], 1, result['threshold'])], outcome, eps)
        
        if rules:
            # if this is not the root
            idxs = np.product([df_est[key] <= th for key, sign, th in rules if sign == 0] + \
                   [df_est[key] > th for key, sign, th in rules if sign == 1],
                   axis=0) > 0
            dff = df_est[idxs]
        else:
            idxs = np.ones(len(df_est)).astype(bool)
            dff = df_est
            
        propensities_1 = np.mean(np.product([probabilities_est[key][idxs] <= th for key, sign, th in rules if sign == 0] + \
               [probabilities_est[key][idxs] > th for key, sign, th in rules if sign == 1]+\
                [probabilities_est[self.treatment][idxs] == 1],
               axis=0), axis=1)

        propensities_0 = np.mean(np.product([probabilities_est[key][idxs] <= th for key, sign, th in rules if sign == 0] + \
               [probabilities_est[key][idxs] > th for key, sign, th in rules if sign == 1]+\
               [probabilities_est[self.treatment][idxs] == 0],
               axis=0), axis=1)
            
        idxs_filter = np.logical_and(propensities_1 > 0, propensities_0 > 0)
        dff = dff[idxs_filter]
        propensities_1 = propensities_1[idxs_filter]
        propensities_0 = propensities_0[idxs_filter]
                
        mod = sm.WLS(dff[outcome], sm.add_constant(dff[self.treatment]), 
                     weights=1.0 / propensities_1 * dff[self.treatment] + 1.0 / propensities_0 * (1-dff[self.treatment]))        
        res = mod.fit()
        mse = np.sum((res.resid ** 2) * (1.0 / propensities_1 * dff[self.treatment] + 1.0 / propensities_0 * (1-dff[self.treatment]))) 
        
        average_hajek = res.params[1]
        average_hajek_se = res.bse[1] # dff[outcome].std() / np.sqrt(len(dff)-1)
        
        est_result['hajek'] = average_hajek
        est_result['hajek_se'] = average_hajek_se
        est_result['mse'] = mse
        est_result['N'] = len(dff)
        return est_result