in clearbox/features.py [0:0]
def encode(srs: pd.Series) -> pd.Series:
"""Encode the `pd.Series` of hashable objects into a `pd.Series` of int IDs.
This may be useful if the feature you wanna group on in the cross validation
is represented by string or any other non-int hashable object.
Args:
srs: `pd.Series` of hashable objects.
Returns:
`pd.Series` where each component is a unique `int` ID of the corresponding
value from `srs`.
"""
value_to_id = {value: id_ for id_, value in enumerate(sorted(srs.unique()))}
# Manual type cast is needed as `apply` may return `pd.DataFrame` in some
# cases which are not relevant for this function (e.g. when lambda returns
# collection).
return t.cast(pd.Series, srs.apply(lambda x: value_to_id[x]))