def CI()

in src/smclarify/bias/metrics/pretraining.py [0:0]


def CI(feature: pd.Series, sensitive_facet_index: pd.Series) -> float:
    r"""
    Class Imbalance (CI)

    :param feature: input feature
    :param sensitive_facet_index: boolean column indicating sensitive group
    :return: a float in the interval [-1, +1] indicating an under-representation or over-representation
        of the sensitive class.

    .. math::
        CI = \frac{na-nd}{na+nd}

    Bias is often generated from an under-representation of
    the sensitive class in the dataset, especially if the desired “golden truth”
    is equality across classes. Imbalance carries over into model predictions.
    We will report all measures in differences and normalized differences. Since
    the measures are often probabilities or proportions, the differences will lie in
    We define CI = (np − p)/(np + p). Where np is the number of instances in the not sensitive group
    and p is number of instances in the sensitive group.
    """
    require(sensitive_facet_index.dtype == bool, "sensitive_facet_index must be of type bool")
    pos = len(feature[sensitive_facet_index])
    neg = len(feature[~sensitive_facet_index])
    q = pos + neg
    if neg == 0:
        raise ValueError("CI: negated facet set is empty. Check that x[~facet] has non-zero length.")
    if pos == 0:
        raise ValueError("CI: facet set is empty. Check that x[facet] has non-zero length.")
    assert q != 0
    ci = float(neg - pos) / q
    return ci