def cat_continuous()

in causalml/inference/tree/utils.py [0:0]


def cat_continuous(x, granularity='Medium'):
    '''
    Categorize (bin) continuous variable based on percentile.

    Args
    ----

    x : list
        Feature values.

    granularity : string, optional, (default = 'Medium')
        Control the granularity of the bins, optional values are: 'High', 'Medium', 'Low'.

    Returns
    -------
    res : list
        List of percentile bins for the feature value.
    '''
    if granularity == 'High':
        lspercentile = [np.percentile(x, 5),
                        np.percentile(x, 10),
                        np.percentile(x, 15),
                        np.percentile(x, 20),
                        np.percentile(x, 25),
                        np.percentile(x, 30),
                        np.percentile(x, 35),
                        np.percentile(x, 40),
                        np.percentile(x, 45),
                        np.percentile(x, 50),
                        np.percentile(x, 55),
                        np.percentile(x, 60),
                        np.percentile(x, 65),
                        np.percentile(x, 70),
                        np.percentile(x, 75),
                        np.percentile(x, 80),
                        np.percentile(x, 85),
                        np.percentile(x, 90),
                        np.percentile(x, 95),
                        np.percentile(x, 99)
                        ]
        res = ['> p90 (%s)' % (lspercentile[8]) if z > lspercentile[8] else
               '<= p10 (%s)' % (lspercentile[0]) if z <= lspercentile[0] else
               '<= p20 (%s)' % (lspercentile[1]) if z <= lspercentile[1] else
               '<= p30 (%s)' % (lspercentile[2]) if z <= lspercentile[2] else
               '<= p40 (%s)' % (lspercentile[3]) if z <= lspercentile[3] else
               '<= p50 (%s)' % (lspercentile[4]) if z <= lspercentile[4] else
               '<= p60 (%s)' % (lspercentile[5]) if z <= lspercentile[5] else
               '<= p70 (%s)' % (lspercentile[6]) if z <= lspercentile[6] else
               '<= p80 (%s)' % (lspercentile[7]) if z <= lspercentile[7] else
               '<= p90 (%s)' % (lspercentile[8]) if z <= lspercentile[8] else
               '> p90 (%s)' % (lspercentile[8]) for z in x]
    elif granularity == 'Medium':
        lspercentile = [np.percentile(x, 10),
                        np.percentile(x, 20),
                        np.percentile(x, 30),
                        np.percentile(x, 40),
                        np.percentile(x, 50),
                        np.percentile(x, 60),
                        np.percentile(x, 70),
                        np.percentile(x, 80),
                        np.percentile(x, 90)
                        ]
        res = ['<= p10 (%s)' % (lspercentile[0]) if z <= lspercentile[0] else
               '<= p20 (%s)' % (lspercentile[1]) if z <= lspercentile[1] else
               '<= p30 (%s)' % (lspercentile[2]) if z <= lspercentile[2] else
               '<= p40 (%s)' % (lspercentile[3]) if z <= lspercentile[3] else
               '<= p50 (%s)' % (lspercentile[4]) if z <= lspercentile[4] else
               '<= p60 (%s)' % (lspercentile[5]) if z <= lspercentile[5] else
               '<= p70 (%s)' % (lspercentile[6]) if z <= lspercentile[6] else
               '<= p80 (%s)' % (lspercentile[7]) if z <= lspercentile[7] else
               '<= p90 (%s)' % (lspercentile[8]) if z <= lspercentile[8] else
               '> p90 (%s)' % (lspercentile[8]) for z in x]
    else:
        lspercentile = [np.percentile(x, 15), np.percentile(x, 50), np.percentile(x, 85)]
        res = ['1-Very Low' if z < lspercentile[0] else
               '2-Low' if z < lspercentile[1] else
               '3-High' if z < lspercentile[2] else
               '4-Very High' for z in x]
    return res