in src/utils.py [0:0]
def _process_dataset(x: pd.DataFrame, y: np.ndarray, normalize: bool):
x.columns = x.columns.astype(str)
transformers = []
str_transform = [("ordinalEncoder", OrdinalEncoder())]
numeric_transform = [("passthrough", "passthrough")]
if normalize:
str_transform.append(("StandardScaler", StandardScaler()))
numeric_transform.append(("StandardScaler", StandardScaler()))
for col in x.columns:
pp = str_transform if x[col].dtype == "object" else numeric_transform
transformers.append((col, Pipeline(pp), [col]))
x = (
ColumnTransformer(transformers, sparse_threshold=0)
.fit_transform(x)
.astype(float)
)
y = _encode_target(y)
return x, y