def eval_model()

in 07_sparkml/logistic.py [0:0]


def eval_model(labelpred):
    '''
            data = (label, pred)
                data[0] = label
                data[1] = pred
    '''
    cancel = labelpred.filter(lambda data: data[1] < 0.7)
    nocancel = labelpred.filter(lambda data: data[1] >= 0.7)
    corr_cancel = cancel.filter(lambda data: data[0] == int(data[1] >= 0.7)).count()
    corr_nocancel = nocancel.filter(lambda data: data[0] == int(data[1] >= 0.7)).count()

    cancel_denom = cancel.count()
    nocancel_denom = nocancel.count()
    if cancel_denom == 0:
        cancel_denom = 1
    if nocancel_denom == 0:
        nocancel_denom = 1
    return {
        'total_cancel': cancel.count(),
        'correct_cancel': float(corr_cancel)/cancel_denom,
        'total_noncancel': nocancel.count(),
        'correct_noncancel': float(corr_nocancel)/nocancel_denom
    }