def load_data()

in causalml/features.py [0:0]


def load_data(data, features, transformations={}):
    """Load data and set the feature matrix and label vector.

    Args:
        data (pandas.DataFrame): total input data
        features (list of str): column names to be used in the inference model
        transformation (dict of (str, func)): transformations to be applied to features

    Returns:
        X (numpy.matrix): a feature matrix
    """

    df = data[features].copy()

    bool_cols = [col for col in df.columns if df[col].dtype == bool]
    df.loc[:, bool_cols] = df[bool_cols].astype(int)

    for col, transformation in transformations.items():
        logger.info("Applying {} to {}".format(transformation.__name__, col))
        df[col] = df[col].apply(transformation)

    cat_cols = [col for col in features if df[col].dtype == object]
    num_cols = [col for col in features if col not in cat_cols]

    logger.info("Applying one-hot-encoding to {}".format(cat_cols))
    ohe = OneHotEncoder(min_obs=df.shape[0] * 0.001)
    X_cat = ohe.fit_transform(df[cat_cols]).todense()

    X = np.hstack([df[num_cols].values, X_cat])

    return X