in src/sagemaker_sklearn_extension/preprocessing/encoders.py [0:0]
def transform(self, X):
"""Transform each column of `X` using the Weight-of-Evidence encoding.
Returns
-------
X_encoded: array, shape (n_samples, n_encoded_features)
Array with each of the encoded columns.
"""
# check is fitted
check_is_fitted(self, "woe_pairs_")
# check input
X = check_array(X)
if X.shape[1] != self._dim:
raise ValueError(f"The input dimension is {X.shape[1]} instead of the expected {self._dim}")
if self.binning:
Xp = self.binner_.transform(X)
else:
Xp = X
Xe = np.zeros(Xp.shape)
for (i, x) in enumerate(Xp.T):
codex, woe = self.woe_pairs_[i]
# check that the data to encode doesn't have classes yet unseen
assert all([e in codex.keys() for e in np.unique(x)]), WOEAsserts.UNSEEN_CAT
# construct the encoded column by inverting the codex, if the category
# is not recognised (not a key of the codex), a np.nan is inputted
Xe[:, i] = np.array([woe[codex[xi]] for xi in x])
return Xe