def german_lending_readable_values()

in src/smclarify/util/dataset.py [0:0]


def german_lending_readable_values(df: pd.DataFrame) -> pd.DataFrame:
    """
    Convert Statlog German lending dataset to have human readable values
    https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)
    """
    # df["target"] = df["Class1Good2Bad"].replace([1, 2], ["good", "bad"]).astype('category')
    df["target"] = df["Class1Good2Bad"].replace([1, 2], [1, 0]).astype("category")
    df = df.drop(columns=["Class1Good2Bad"])
    df["CheckingAC_Status"] = (
        df["CheckingAC_Status"]
        .replace(["A11", "A12", "A13", "A14"], ["x < 0 DM", "0 <= x < 200 DM", "x >= 200DM", "no checking account"])
        .astype("category")
    )
    df["CreditHistory"] = (
        df["CreditHistory"]
        .replace(
            ["A30", "A31", "A32", "A33", "A34"],
            ["no credits", "all credits paid", "existing credits paid", "delay", "critical accnt. / other credits"],
        )
        .astype("category")
    )
    df["Purpose"] = (
        df["Purpose"]
        .replace(
            ["A40", "A41", "A42", "A43", "A44", "A45", "A46", "A47", "A48", "A49", "A410"],
            [
                "new car",
                "used car",
                "forniture",
                "radio/tv",
                "appliances",
                "repairs",
                "education",
                "vacation",
                "retraining",
                "business",
                "others",
            ],
        )
        .astype("category")
    )
    df["SavingsAC"] = (
        df["SavingsAC"]
        .replace(
            ["A61", "A62", "A63", "A64", "A65"],
            ["x < 100 DM", "100 <= x < 500 DM", "500 <= x < 1000 DM", "x >= 1000 DM", "unknown"],
        )
        .astype("category")
    )
    df["Employment"] = (
        df["Employment"]
        .replace(
            ["A71", "A72", "A73", "A74", "A75"],
            ["unemployed", "x < 1 year", "1 <= x < 4 years", "4 <= x < 7 years", "x >= 7 years"],
        )
        .astype("category")
    )
    df["SexAndStatus"] = (
        df["SexAndStatus"]
        .replace(
            ["A91", "A92", "A93", "A94", "A95"],
            [
                "male divorced/separated",
                "female divorced/separated/married",
                "male single",
                "male married/widowed",
                "female single",
            ],
        )
        .astype("category")
    )
    df["OtherDebts"] = (
        df["OtherDebts"].replace(["A101", "A102", "A103"], ["none", "co-applicant", "guarantor"]).astype("category")
    )
    df["Property"] = (
        df["Property"]
        .replace(
            ["A121", "A122", "A123", "A124"],
            ["real estate", "soc. savings / life insurance", "car or other", "unknown"],
        )
        .astype("category")
    )
    df["OtherInstalmentPlans"] = (
        df["OtherInstalmentPlans"].replace(["A141", "A142", "A143"], ["bank", "stores", "none"]).astype("category")
    )
    df["Housing"] = df["Housing"].replace(["A151", "A152", "A153"], ["rent", "own", "for free"]).astype("category")
    df["Job"] = (
        df["Job"]
        .replace(
            ["A171", "A172", "A173", "A174"],
            [
                "unemployed / unskilled-non-resident",
                "unskilled-resident",
                "skilled employee / official",
                "management / self-employed / highly qualified employee / officer",
            ],
        )
        .astype("category")
    )
    df["Telephone"] = df["Telephone"].replace(["A191", "A192"], ["none", "yes"]).astype("category")
    df["ForeignWorker"] = df["ForeignWorker"].replace(["A201", "A202"], ["yes", "no"]).astype("category")
    return df