def group_uniqueCounts_to_arr()

in causalml/inference/tree/uplift.pyx [0:0]


def group_uniqueCounts_to_arr(np.ndarray[TR_TYPE_t, ndim=1] treatment_idx,
                              np.ndarray[Y_TYPE_t, ndim=1] y,
                              np.ndarray[N_TYPE_t, ndim=1] out_arr):
    '''
        Count sample size by experiment group.

        Args
        ----
        treatment_idx : array-like, shape = [num_samples]
            An array containing the treatment group index for each unit.
            Should be of type numpy.int8
        y : array-like, shape = [num_samples]
            An array containing the outcome of interest for each unit.
            Should be of type numpy.int8
        out_arr : array-like, shape = [2 * n_class]
            An array to store the output counts, should have type numpy.int32

    Returns
    -------

    No return value, but modified the out_arr to hold the negative and positive
    outcome sample sizes for each of the control and treatment groups.
        out_arr[2*i] is N(Y = 0, T = i) for i = 0, ..., n_class
        out_arr[2*i+1] is N(Y = 1, T = i) for i = 0, ..., n_class
    '''
    cdef int out_arr_len = out_arr.shape[0]
    cdef int n_class = out_arr_len / 2
    cdef int num_samples = treatment_idx.shape[0]
    cdef int yv = 0
    cdef int tv = 0
    cdef int i = 0
    # first clear the output
    for i in range(out_arr_len):
        out_arr[i] = 0
    # then loop through treatment_idx and y, sum the counts
    # first sum as N(T = i) and N(Y = 1, T = i) at index (2*i, 2*i+1), and later adjust
    for i in range(num_samples):
        tv = treatment_idx[i]
        # assume treatment index is in range
        out_arr[2*tv] += 1
        # assume y should be either 0 or 1, so this is summing 
        out_arr[2*tv + 1] += y[i]
    # adjust the entry at index 2*i to be N(Y = 0, T = i) = N(T = i) - N(Y = 1, T = i)
    for i in range(n_class):
        out_arr[2*i] -= out_arr[2*i + 1]