in causalml/inference/tree/utils.py [0:0]
def cat_continuous(x, granularity="Medium"):
"""
Categorize (bin) continuous variable based on percentile.
Args
----
x : list
Feature values.
granularity : string, optional, (default = 'Medium')
Control the granularity of the bins, optional values are: 'High', 'Medium', 'Low'.
Returns
-------
res : list
List of percentile bins for the feature value.
"""
if granularity == "High":
lspercentile = [
np.percentile(x, 5),
np.percentile(x, 10),
np.percentile(x, 15),
np.percentile(x, 20),
np.percentile(x, 25),
np.percentile(x, 30),
np.percentile(x, 35),
np.percentile(x, 40),
np.percentile(x, 45),
np.percentile(x, 50),
np.percentile(x, 55),
np.percentile(x, 60),
np.percentile(x, 65),
np.percentile(x, 70),
np.percentile(x, 75),
np.percentile(x, 80),
np.percentile(x, 85),
np.percentile(x, 90),
np.percentile(x, 95),
np.percentile(x, 99),
]
res = [
(
"> p90 (%s)" % (lspercentile[8])
if z > lspercentile[8]
else (
"<= p10 (%s)" % (lspercentile[0])
if z <= lspercentile[0]
else (
"<= p20 (%s)" % (lspercentile[1])
if z <= lspercentile[1]
else (
"<= p30 (%s)" % (lspercentile[2])
if z <= lspercentile[2]
else (
"<= p40 (%s)" % (lspercentile[3])
if z <= lspercentile[3]
else (
"<= p50 (%s)" % (lspercentile[4])
if z <= lspercentile[4]
else (
"<= p60 (%s)" % (lspercentile[5])
if z <= lspercentile[5]
else (
"<= p70 (%s)" % (lspercentile[6])
if z <= lspercentile[6]
else (
"<= p80 (%s)" % (lspercentile[7])
if z <= lspercentile[7]
else (
"<= p90 (%s)" % (lspercentile[8])
if z <= lspercentile[8]
else "> p90 (%s)"
% (lspercentile[8])
)
)
)
)
)
)
)
)
)
)
for z in x
]
elif granularity == "Medium":
lspercentile = [
np.percentile(x, 10),
np.percentile(x, 20),
np.percentile(x, 30),
np.percentile(x, 40),
np.percentile(x, 50),
np.percentile(x, 60),
np.percentile(x, 70),
np.percentile(x, 80),
np.percentile(x, 90),
]
res = [
(
"<= p10 (%s)" % (lspercentile[0])
if z <= lspercentile[0]
else (
"<= p20 (%s)" % (lspercentile[1])
if z <= lspercentile[1]
else (
"<= p30 (%s)" % (lspercentile[2])
if z <= lspercentile[2]
else (
"<= p40 (%s)" % (lspercentile[3])
if z <= lspercentile[3]
else (
"<= p50 (%s)" % (lspercentile[4])
if z <= lspercentile[4]
else (
"<= p60 (%s)" % (lspercentile[5])
if z <= lspercentile[5]
else (
"<= p70 (%s)" % (lspercentile[6])
if z <= lspercentile[6]
else (
"<= p80 (%s)" % (lspercentile[7])
if z <= lspercentile[7]
else (
"<= p90 (%s)" % (lspercentile[8])
if z <= lspercentile[8]
else "> p90 (%s)" % (lspercentile[8])
)
)
)
)
)
)
)
)
)
for z in x
]
else:
lspercentile = [
np.percentile(x, 15),
np.percentile(x, 50),
np.percentile(x, 85),
]
res = [
(
"1-Very Low"
if z < lspercentile[0]
else (
"2-Low"
if z < lspercentile[1]
else "3-High" if z < lspercentile[2] else "4-Very High"
)
)
for z in x
]
return res