in causalml/metrics/visualize.py [0:0]
def get_std_diffs(X, W, weight=None, weighted=False, numeric_threshold=5):
"""Calculate the inverse probability of treatment weighted standardized
differences in covariate means between the treatment and the control.
If weighting is set to 'False', calculate unweighted standardized
differences. Accepts only continuous and binary numerical variables.
"""
cont_cols, prop_cols = _get_numeric_vars(X, threshold=numeric_threshold)
cols = cont_cols + prop_cols
if len(cols) == 0:
raise ValueError(
"No variable passed the test for continuous or binary variables."
)
treat = W == 1
contr = W == 0
X_1 = X.loc[treat, cols]
X_0 = X.loc[contr, cols]
cont_index = np.array([col in cont_cols for col in cols])
prop_index = np.array([col in prop_cols for col in cols])
std_diffs_cont = np.empty(sum(cont_index))
std_diffs_prop = np.empty(sum(prop_index))
if weighted:
assert (
weight is not None
), 'weight should be provided when weighting is set to "True"'
weight_1 = weight[treat]
weight_0 = weight[contr]
X_1_mean, X_1_var = np.apply_along_axis(
lambda x: _get_wmean_wvar(x, weight_1), 0, X_1
)
X_0_mean, X_0_var = np.apply_along_axis(
lambda x: _get_wmean_wvar(x, weight_0), 0, X_0
)
elif not weighted:
X_1_mean, X_1_var = np.apply_along_axis(lambda x: _get_mean_var(x), 0, X_1)
X_0_mean, X_0_var = np.apply_along_axis(lambda x: _get_mean_var(x), 0, X_0)
X_1_mean_cont, X_1_var_cont = X_1_mean[cont_index], X_1_var[cont_index]
X_0_mean_cont, X_0_var_cont = X_0_mean[cont_index], X_0_var[cont_index]
std_diffs_cont = (X_1_mean_cont - X_0_mean_cont) / np.sqrt(
(X_1_var_cont + X_0_var_cont) / 2
)
X_1_mean_prop = X_1_mean[prop_index]
X_0_mean_prop = X_0_mean[prop_index]
std_diffs_prop = (X_1_mean_prop - X_0_mean_prop) / np.sqrt(
((X_1_mean_prop * (1 - X_1_mean_prop)) + (X_0_mean_prop * (1 - X_0_mean_prop)))
/ 2
)
std_diffs = np.concatenate([std_diffs_cont, std_diffs_prop], axis=0)
std_diffs_df = pd.DataFrame(std_diffs, index=cols)
return std_diffs_df