def manova()

in stats/statistical_scoring.py [0:0]


def manova(test_row, data, categorical):

    data = data.dropna()
    data.loc[len(data)] = test_row

    le = LabelEncoder()
    for val in categorical:
        data[val] = le.fit_transform(data[val])

    for col in data.columns:
        if (col not in categorical):
            data[col] = (data[col] - np.mean(data[col])) / np.std(data[col])

    test_row = data.iloc[len(data) - 1]
    data.drop([len(data) - 1])

    data_good = data[data[10] == 0]
    data_bad = data[data[10] == 1]

    x_good = data_good.drop([10, 9], axis=1)
    y_good = data_good[[9]]
    x_bad = data_bad.drop([10, 9], axis=1)
    y_bad = data_bad[[9]]

    man_good = MANOVA(endog=x_good, exog=y_good)
    man_bad = MANOVA(endog=x_bad, exog=y_bad)

    output_good = man_good.mv_test()
    output_bad = man_bad.mv_test()

    out_good = np.array(output_good['x0']['stat'])
    out_bad = np.array(output_bad['x0']['stat'])

    # Wilki's Lambda
    WL_good = out_good[0][0]

    # Pillai's Trace
    PT_good = out_good[1][0]

    # Hotelling-Lawley Trace
    HT_good = out_good[2][0]

    # Roy's Greatest Roots
    RGR_good = out_good[3][0]

    WL_bad = out_bad[0][0]
    PT_bad = out_bad[1][0]
    HT_bad = out_bad[2][0]
    RGR_bad = out_bad[3][0]

    x = test_row.drop([10, 9])
    y = test_row[[9]]

    data_test_x = x_good.append(x)
    data_test_y = y_good.append(y)

    man_test = MANOVA(endog=data_test_x, exog=data_test_y)
    output_test = man_test.mv_test()

    out_test = np.array(output_test['x0']['stat'])

    # Wilki's Lambda
    WL_test_good = out_test[0][0]

    # Pillai's Trace
    PT_test_good = out_test[1][0]

    # Hotelling-Lawley Trace
    HT_test_good = out_test[2][0]

    # Roy's Greatest Roots
    RGR_test_good = out_test[3][0]

    data_test_x = x_bad.append(x)
    data_test_y = y_bad.append(y)

    man_test = MANOVA(endog=data_test_x, exog=data_test_y)
    output_test = man_test.mv_test()

    out_test = np.array(output_test['x0']['stat'])

    WL_test_bad = out_test[0][0]
    PT_test_bad = out_test[1][0]
    HT_test_bad = out_test[2][0]
    RGR_test_bad = out_test[3][0]

    scorecard = {
        "method": "MANOVA",
        "WL_good": WL_good,
        "WL_test_good": WL_test_good,
        "WL_bad": WL_bad,
        "WL_test_bad": WL_test_bad
    }

    ret = "WL good : " + str(WL_good) + " WL test good : " + str(WL_test_good) + \
        "\nWL bad : " + \
        str(WL_bad) + " WL test bad : " + \
        str(WL_test_bad)

    return scorecard