in causalPartition.py [0:0]
def split_exposure_hajek(self, separate, outcome, feature_set, max_attempt=30, eps=0.0, delta=0.0,
df_train=None, probabilities=None, criteria={'non_trivial_reduction': 0}):
"""
The API for spitting
separate: True=separate trees
outcome: outcome variable
feature_set: a list of features used to partition (may include ``assignment'')
min_variance_reduct: minimum variance reduction in each partition, only partition if reduction is significantly large
max_attempt: sample threshold -- a larger value tend to over fit more
eps: avoid non-zero or zero-trivial probability
delta: avoid non-zero or zero-trivial probability
df_train: leave it as None
probabilities: leave it as None
"""
if separate == True:
df_train = self.df_train # training set
probabilities = self.probabilities # probability tensor
probabilities_train = {}
for key in [self.treatment]+feature_set:
probabilities_train[key] = probabilities[key][self.idx_tr]
mod = sm.WLS(df_train[outcome], np.ones(len(df_train)))
res = mod.fit()
total_sse = np.sum(res.resid ** 2) # total sse
train_result = {}
train_result = self._split_exposure_hajek(1, df_train, probabilities_train, feature_set, max_attempt,
eps, delta, outcome, [],
len(df_train), total_sse, criteria)
train_result['N'] = len(df_train)
train_result['hajek'] = df_train[outcome].mean()
train_result['hajek_se'] = df_train[outcome].std() / np.sqrt(len(df_train[outcome])-1)
self.result_separate = train_result
return train_result
else:
df_train = self.df_train
probabilities = self.probabilities
probabilities_train = {}
for key in [self.treatment]+feature_set:
probabilities_train[key] = probabilities[key][self.idx_tr]
mod = sm.WLS(df_train[outcome], sm.add_constant(df_train[self.treatment]))
res = mod.fit()
total_sse = np.sum(res.resid ** 2) * 2
train_result_eht = {}
train_result_eht = self._split_exposure_hajek_eht(1, df_train, probabilities_train, feature_set, max_attempt,
eps, delta, outcome, [], len(df_train), total_sse, criteria)
train_result_eht['N'] = len(df_train)
train_result_eht['hajek'] = res.params[1]
train_result_eht['hajek_se'] = res.bse[1]
return train_result_eht