in causalml/features.py [0:0]
def load_data(data, features, transformations={}):
"""Load data and set the feature matrix and label vector.
Args:
data (pandas.DataFrame): total input data
features (list of str): column names to be used in the inference model
transformation (dict of (str, func)): transformations to be applied to features
Returns:
X (numpy.matrix): a feature matrix
"""
df = data[features].copy()
bool_cols = [col for col in df.columns if df[col].dtype == bool]
df.loc[:, bool_cols] = df[bool_cols].astype(int)
for col, transformation in transformations.items():
logger.info("Applying {} to {}".format(transformation.__name__, col))
df[col] = df[col].apply(transformation)
cat_cols = [col for col in features if df[col].dtype == object]
num_cols = [col for col in features if col not in cat_cols]
logger.info("Applying one-hot-encoding to {}".format(cat_cols))
ohe = OneHotEncoder(min_obs=df.shape[0] * 0.001)
X_cat = ohe.fit_transform(df[cat_cols]).todense()
X = np.hstack([df[num_cols].values, X_cat])
return X