def cat_continuous()

in causalml/inference/tree/utils.py [0:0]


def cat_continuous(x, granularity="Medium"):
    """
    Categorize (bin) continuous variable based on percentile.

    Args
    ----

    x : list
        Feature values.

    granularity : string, optional, (default = 'Medium')
        Control the granularity of the bins, optional values are: 'High', 'Medium', 'Low'.

    Returns
    -------
    res : list
        List of percentile bins for the feature value.
    """
    if granularity == "High":
        lspercentile = [
            np.percentile(x, 5),
            np.percentile(x, 10),
            np.percentile(x, 15),
            np.percentile(x, 20),
            np.percentile(x, 25),
            np.percentile(x, 30),
            np.percentile(x, 35),
            np.percentile(x, 40),
            np.percentile(x, 45),
            np.percentile(x, 50),
            np.percentile(x, 55),
            np.percentile(x, 60),
            np.percentile(x, 65),
            np.percentile(x, 70),
            np.percentile(x, 75),
            np.percentile(x, 80),
            np.percentile(x, 85),
            np.percentile(x, 90),
            np.percentile(x, 95),
            np.percentile(x, 99),
        ]
        res = [
            (
                "> p90 (%s)" % (lspercentile[8])
                if z > lspercentile[8]
                else (
                    "<= p10 (%s)" % (lspercentile[0])
                    if z <= lspercentile[0]
                    else (
                        "<= p20 (%s)" % (lspercentile[1])
                        if z <= lspercentile[1]
                        else (
                            "<= p30 (%s)" % (lspercentile[2])
                            if z <= lspercentile[2]
                            else (
                                "<= p40 (%s)" % (lspercentile[3])
                                if z <= lspercentile[3]
                                else (
                                    "<= p50 (%s)" % (lspercentile[4])
                                    if z <= lspercentile[4]
                                    else (
                                        "<= p60 (%s)" % (lspercentile[5])
                                        if z <= lspercentile[5]
                                        else (
                                            "<= p70 (%s)" % (lspercentile[6])
                                            if z <= lspercentile[6]
                                            else (
                                                "<= p80 (%s)" % (lspercentile[7])
                                                if z <= lspercentile[7]
                                                else (
                                                    "<= p90 (%s)" % (lspercentile[8])
                                                    if z <= lspercentile[8]
                                                    else "> p90 (%s)"
                                                    % (lspercentile[8])
                                                )
                                            )
                                        )
                                    )
                                )
                            )
                        )
                    )
                )
            )
            for z in x
        ]
    elif granularity == "Medium":
        lspercentile = [
            np.percentile(x, 10),
            np.percentile(x, 20),
            np.percentile(x, 30),
            np.percentile(x, 40),
            np.percentile(x, 50),
            np.percentile(x, 60),
            np.percentile(x, 70),
            np.percentile(x, 80),
            np.percentile(x, 90),
        ]
        res = [
            (
                "<= p10 (%s)" % (lspercentile[0])
                if z <= lspercentile[0]
                else (
                    "<= p20 (%s)" % (lspercentile[1])
                    if z <= lspercentile[1]
                    else (
                        "<= p30 (%s)" % (lspercentile[2])
                        if z <= lspercentile[2]
                        else (
                            "<= p40 (%s)" % (lspercentile[3])
                            if z <= lspercentile[3]
                            else (
                                "<= p50 (%s)" % (lspercentile[4])
                                if z <= lspercentile[4]
                                else (
                                    "<= p60 (%s)" % (lspercentile[5])
                                    if z <= lspercentile[5]
                                    else (
                                        "<= p70 (%s)" % (lspercentile[6])
                                        if z <= lspercentile[6]
                                        else (
                                            "<= p80 (%s)" % (lspercentile[7])
                                            if z <= lspercentile[7]
                                            else (
                                                "<= p90 (%s)" % (lspercentile[8])
                                                if z <= lspercentile[8]
                                                else "> p90 (%s)" % (lspercentile[8])
                                            )
                                        )
                                    )
                                )
                            )
                        )
                    )
                )
            )
            for z in x
        ]
    else:
        lspercentile = [
            np.percentile(x, 15),
            np.percentile(x, 50),
            np.percentile(x, 85),
        ]
        res = [
            (
                "1-Very Low"
                if z < lspercentile[0]
                else (
                    "2-Low"
                    if z < lspercentile[1]
                    else "3-High" if z < lspercentile[2] else "4-Very High"
                )
            )
            for z in x
        ]
    return res