def eval()

in 07_sparkml/experiment.py [0:0]


def eval(labelpred):
    '''
        data = (label, pred)
            data[0] = label
            data[1] = pred
    '''
    cancel = labelpred.filter(lambda data: data[1] < 0.7)
    nocancel = labelpred.filter(lambda data: data[1] >= 0.7)
    corr_cancel = cancel.filter(lambda data: data[0] == int(data[1] >= 0.7)).count()
    corr_nocancel = nocancel.filter(lambda data: data[0] == int(data[1] >= 0.7)).count()

    cancel_denom = cancel.count()
    nocancel_denom = nocancel.count()
    if cancel_denom == 0:
        cancel_denom = 1
    if nocancel_denom == 0:
        nocancel_denom = 1

    totsqe = labelpred.map(
        lambda data: (data[0] - data[1]) * (data[0] - data[1])
    ).sum()
    rmse = np.sqrt(totsqe / float(cancel.count() + nocancel.count()))

    return {
        'rmse': rmse,
        'total_cancel': cancel.count(),
        'correct_cancel': float(corr_cancel) / cancel_denom,
        'total_noncancel': nocancel.count(),
        'correct_noncancel': float(corr_nocancel) / nocancel_denom
    }