in causalml/features.py [0:0]
def _get_label_encoder_and_max(self, x):
"""Return a mapping from values and its maximum of a column to integer labels.
Args:
x (pandas.Series): a categorical column to encode.
Returns:
label_encoder (dict): mapping from values of features to integers
max_label (int): maximum label
"""
# NaN cannot be used as a key for dict. So replace it with a random integer.
label_count = x.fillna(NAN_INT).value_counts()
n_uniq = label_count.shape[0]
label_count = label_count[label_count >= self.min_obs]
n_uniq_new = label_count.shape[0]
# If every label appears more than min_obs, new label starts from 0.
# Otherwise, new label starts from 1 and 0 is used for all old labels
# that appear less than min_obs.
offset = 0 if n_uniq == n_uniq_new else 1
label_encoder = pd.Series(
np.arange(n_uniq_new) + offset, index=label_count.index
)
max_label = label_encoder.max()
label_encoder = label_encoder.to_dict()
return label_encoder, max_label