in causalml/inference/tree/uplift.pyx [0:0]
def divideSet(X, treatment_idx, y, column, value):
'''
Tree node split.
Args
----
X : ndarray, shape = [num_samples, num_features]
An ndarray of the covariates used to train the uplift model.
treatment_idx : array-like, shape = [num_samples]
An array containing the treatment group index for each unit.
y : array-like, shape = [num_samples]
An array containing the outcome of interest for each unit.
column : int
The column used to split the data.
value : float or int
The value in the column for splitting the data.
Returns
-------
(X_l, X_r, treatment_l, treatment_r, y_l, y_r) : list of ndarray
The covariates, treatments and outcomes of left node and the right node.
'''
# for int and float values
if np.issubdtype(value.dtype, np.number):
filt = X[:, column] >= value
else: # for strings
filt = X[:, column] == value
return X[filt], X[~filt], treatment_idx[filt], treatment_idx[~filt], y[filt], y[~filt]