in src/smclarify/bias/metrics/common.py [0:0]
def convert_positive_label_values(series: pd.Series, positive_label_values: List[Union[str, int, float]]) -> List:
"""
Determines the type of the given data series and then do necessary type conversion to ensure the positive_lable_values
are of the same type as those in series.
Example problem when it helps:
The problem is that the `label_values_or_threshold` and the actual `label` values are not the same -
i.e. do not have the same type. This leads to customer facing errors when they pass numerical values
to `label_values_or_threshold` (for instance `[1, 2, 3]`) but having string values in the label column
of the dataset (for instance, `['1', '2', '3', '4', '5']`).
:param series: data for facet/label/predicted_label columns
:param positive_label_values: list of label values provided by user
:return: list of label values provided after the conversion (if any)
"""
def _convert(items: List, _type: Callable) -> List:
try:
return [_type(item) for item in items]
except ValueError as e:
# int('1.0') raises a ValueError
if "invalid literal for int() with base 10" in str(e):
return [float(item) for item in items]
raise Exception(
f"'label' has not positive elements. Double-check if 'label' and 'positive_label_values'"
f"have correct data-types or values."
)
if isinstance(positive_label_values[0], type(series[0])):
return positive_label_values
# if the types are different, convert positive_label_values
converted_values: List[Any]
if isinstance(series[0], bool) and isinstance(positive_label_values, str) and positive_label_values[0].isalpha():
# when values = ['True', 'False'] and series = [False, True, ...]
converted_values = [True if label.lower() == "true" else False for label in positive_label_values]
# else when values = [1, 1.0, 0, 0.0] and series = [False, True, ...], _convert(positive_label_values, bool)
# see else below
else:
converted_values = _convert(positive_label_values, type(series[0]))
logger.warning(
f"Data type of the elements in `positive_label_values` and in `label` must match. "
f"Converted positive_label_values from {positive_label_values} to {converted_values}"
)
return converted_values