def fit()

in src/sagemaker_sklearn_extension/preprocessing/encoders.py [0:0]
19 lines of code
10 McCabe index (conditional complexity)

    def fit(self, X, y):
        """Fit Weight of Evidence encoder to `X` and `y`.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            The data to encode.

        y: array-like, shape (n_samples,)
            The binary target vector.

        Returns
        -------
        self: WOEEncoder.
        """
        # Validate parameters
        if self.binning:
            assert self.binning in ("uniform", "quantile", "kmeans"), WOEAsserts.BINNING
            assert self.n_bins >= 2, WOEAsserts.NBINS
        assert self.alpha >= 0, WOEAsserts.ALPHA
        # Validate data
        X, y = check_X_y(X, y)
        # Keep track of number of features encoded
        self._dim = X.shape[1]
        # recover the target categories and check there's only two
        cat_y = np.unique(y)
        # it should be == 2 but relax to <= 2 for a single-sample test by check_estimator
        assert len(cat_y) <= 2, WOEAsserts.BINARY

        # value for laplace smoothing
        beta = 2 * self.alpha * self.laplace

        # count the number of occurrences per target class and form the mask
        # for the rows for which y==0
        mask_y_0 = y == cat_y[0]
        count_y_0 = sum(mask_y_0)

        if self.binning:
            self.binner_ = KBinsDiscretizer(n_bins=self.n_bins, strategy=self.binning, encode="ordinal")
            Xp = self.binner_.fit_transform(X)
        else:
            Xp = X
        # go over each column and compute the woe
        self.woe_pairs_ = list(map(lambda x: self._woe(x, count_y_0, mask_y_0, beta), Xp.T))
        return self