in econml/inference/_inference.py [0:0]
def summary_frame(self, alpha=0.05, value=0, decimals=3,
feature_names=None, output_names=None, treatment_names=None):
"""
Output the dataframe for all the inferences above.
Parameters
----------
alpha: optional float in [0, 1] (default=0.05)
The overall level of confidence of the reported interval.
The alpha/2, 1-alpha/2 confidence interval is reported.
value: optinal float (default=0)
The mean value of the metric you'd like to test under null hypothesis.
decimals: optinal int (default=3)
Number of decimal places to round each column to.
feature_names: optional list of strings or None (default is None)
The names of the features X
output_names: optional list of strings or None (default is None)
The names of the outputs
treatment_names: optional list of strings or None (default is None)
The names of the treatments
Returns
-------
output: pandas dataframe
The output dataframe includes point estimate, standard error, z score, p value and confidence intervals
of the estimated metric of each treatment on each outcome for each sample X[i]
"""
treatment_names = self.treatment_names if treatment_names is None else treatment_names
output_names = self.output_names if output_names is None else output_names
to_include = OrderedDict()
to_include['point_estimate'] = self._reshape_array(self.point_estimate)
# get the length of X when it's effect, or length of coefficient/intercept when it's coefficient/intercpet
# to_include['point_estimate'] is a flatten vector with length d_t*d_y*nx
nx = to_include['point_estimate'].shape[0] // self._d_t // self.d_y
if self.stderr is not None:
ci_mean = self.conf_int(alpha=alpha)
to_include['stderr'] = self._reshape_array(self.stderr)
to_include['zstat'] = self._reshape_array(self.zstat(value))
to_include['pvalue'] = self._reshape_array(self.pvalue(value))
to_include['ci_lower'] = self._reshape_array(ci_mean[0])
to_include['ci_upper'] = self._reshape_array(ci_mean[1])
if output_names is None:
output_names = ['Y' + str(i) for i in range(self.d_y)]
assert len(output_names) == self.d_y, "Incompatible length of output names"
if treatment_names is None:
treatment_names = ['T' + str(i) for i in range(self._d_t)]
names = ['X', 'Y', 'T']
if self.d_t:
assert len(treatment_names) == self._d_t, "Incompatible length of treatment names"
index = pd.MultiIndex.from_product([range(nx),
output_names, treatment_names], names=names)
else:
index = pd.MultiIndex.from_product([range(nx),
output_names, [treatment_names[0]]], names=names)
res = pd.DataFrame(to_include, index=index).round(decimals)
if self.inf_type == 'coefficient':
if feature_names is not None:
if self.fname_transformer is not None:
feature_names = self.fname_transformer(feature_names)
else:
feature_names = self.feature_names
if feature_names is not None:
ind = feature_names
else:
ind = ['X' + str(i) for i in range(nx)]
res.index = res.index.set_levels(ind, level="X")
elif self.inf_type == 'intercept':
res.index = res.index.set_levels(['cate_intercept'], level="X")
elif self.inf_type == 'ate':
res.index = res.index.set_levels(['ATE'], level="X")
elif self.inf_type == 'att':
res.index = res.index.set_levels(['ATT'], level="X")
if self._d_t == 1:
res.index = res.index.droplevel("T")
if self.d_y == 1:
res.index = res.index.droplevel("Y")
return res