def divideSet_len()

in causalml/inference/tree/uplift.pyx [0:0]


    def divideSet_len(X, treatment_idx, y, column, value):
        '''Tree node split.

        Modified from dividedSet(), but return the len(X_l) and
        len(X_r) instead of the split X_l and X_r, to avoid some
        overhead, intended to be used for finding the split. After
        finding the best splits, can split to find the X_l and X_r.

        Args
        ----
        X : ndarray, shape = [num_samples, num_features]
            An ndarray of the covariates used to train the uplift model.
        treatment_idx : array-like, shape = [num_samples]
            An array containing the treatment group index for each unit.
        y : array-like, shape = [num_samples]
            An array containing the outcome of interest for each unit.
        column : int
                The column used to split the data.
        value : float or int
                The value in the column for splitting the data.

        Returns
        -------
        (len_X_l, len_X_r, treatment_l, treatment_r, y_l, y_r) : list of ndarray
                The covariates nrows, treatments and outcomes of left node and the right node.

        '''
        # for int and float values
        if np.issubdtype(value.dtype, np.number):
            filt = X[:, column] >= value
        else:  # for strings
            filt = X[:, column] == value

        len_X_l = np.sum(filt)
        return len_X_l, len(X) - len_X_l, treatment_idx[filt], treatment_idx[~filt], y[filt], y[~filt]