in causalml/inference/tree/uplift.pyx [0:0]
def group_uniqueCounts_to_arr(np.ndarray[TR_TYPE_t, ndim=1] treatment_idx,
np.ndarray[Y_TYPE_t, ndim=1] y,
np.ndarray[N_TYPE_t, ndim=1] out_arr):
'''
Count sample size by experiment group.
Args
----
treatment_idx : array-like, shape = [num_samples]
An array containing the treatment group index for each unit.
Should be of type numpy.int8
y : array-like, shape = [num_samples]
An array containing the outcome of interest for each unit.
Should be of type numpy.int8
out_arr : array-like, shape = [2 * n_class]
An array to store the output counts, should have type numpy.int32
Returns
-------
No return value, but modified the out_arr to hold the negative and positive
outcome sample sizes for each of the control and treatment groups.
out_arr[2*i] is N(Y = 0, T = i) for i = 0, ..., n_class
out_arr[2*i+1] is N(Y = 1, T = i) for i = 0, ..., n_class
'''
cdef int out_arr_len = out_arr.shape[0]
cdef int n_class = out_arr_len / 2
cdef int num_samples = treatment_idx.shape[0]
cdef int yv = 0
cdef int tv = 0
cdef int i = 0
# first clear the output
for i in range(out_arr_len):
out_arr[i] = 0
# then loop through treatment_idx and y, sum the counts
# first sum as N(T = i) and N(Y = 1, T = i) at index (2*i, 2*i+1), and later adjust
for i in range(num_samples):
tv = treatment_idx[i]
# assume treatment index is in range
out_arr[2*tv] += 1
# assume y should be either 0 or 1, so this is summing
out_arr[2*tv + 1] += y[i]
# adjust the entry at index 2*i to be N(Y = 0, T = i) = N(T = i) - N(Y = 1, T = i)
for i in range(n_class):
out_arr[2*i] -= out_arr[2*i + 1]