def encode()

in clearbox/features.py [0:0]


def encode(srs: pd.Series) -> pd.Series:
  """Encode the `pd.Series` of hashable objects into a `pd.Series` of int IDs.

  This may be useful if the feature you wanna group on in the cross validation
  is represented by string or any other non-int hashable object.

  Args:
    srs: `pd.Series` of hashable objects.

  Returns:
    `pd.Series` where each component is a unique `int` ID of the corresponding
    value from `srs`.
  """
  value_to_id = {value: id_ for id_, value in enumerate(sorted(srs.unique()))}
  # Manual type cast is needed as `apply` may return `pd.DataFrame` in some
  # cases which are not relevant for this function (e.g. when lambda returns
  # collection).
  return t.cast(pd.Series, srs.apply(lambda x: value_to_id[x]))