community-artifacts/Model-selection/Prediction-metrics-demo-1.ipynb (343 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Prediction metrics\n", "\n", "This module provides a set of metrics to evaluate the quality of predictions of a model. A typical function will take a set of \"prediction\" and \"observation\" values and use them to calculate the desired metric, unless noted otherwise." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The sql extension is already loaded. To reload it, use:\n", " %reload_ext sql\n" ] } ], "source": [ "%load_ext sql" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# %sql postgresql://gpdbchina@10.194.10.68:55000/madlib\n", "%sql postgresql://fmcquillan@localhost:5432/madlib" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%sql select madlib.version();" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Continuous variables" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql \n", "DROP TABLE IF EXISTS test_set;\n", "CREATE TABLE test_set(\n", " pred FLOAT8, -- predicted values\n", " obs FLOAT8 -- actual observed values\n", " );\n", "INSERT INTO test_set VALUES\n", " (37.5,53.1), (12.3,34.2), (74.2,65.4), (91.1,82.1);\n", "\n", "SELECT * FROM test_set;\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.mean_abs_error( 'test_set', 'table_out', 'pred', 'obs');\n", "SELECT * FROM table_out;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.mean_abs_perc_error( 'test_set', 'table_out', 'pred', 'obs');\n", "SELECT * FROM table_out;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.mean_perc_error( 'test_set', 'table_out', 'pred', 'obs');\n", "SELECT * FROM table_out;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql \n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.mean_squared_error( 'test_set', 'table_out', 'pred', 'obs');\n", "SELECT * FROM table_out;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.r2_score( 'test_set', 'table_out', 'pred', 'obs');\n", "SELECT * FROM table_out;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.adjusted_r2_score( 'test_set', 'table_out', 'pred', 'obs', 3, 100);\n", "SELECT * FROM table_out;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Binary classification" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create the sample data for binary classifier metrics:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS test_set;\n", "CREATE TABLE test_set AS\n", " SELECT ((a*8)::integer)/8.0 pred, -- prediction probability TRUE\n", " ((a*0.5+random()*0.5)>0.5) obs -- actual observations\n", " FROM (select random() as a from generate_series(1,100)) x;\n", "SELECT * FROM test_set;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the Binary Classifier metrics function and View the True Positive Rate and the False Positive Rate:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.binary_classifier( 'test_set', 'table_out', 'pred', 'obs');\n", "SELECT threshold, tpr, fpr FROM table_out ORDER BY threshold;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "View all metrics at a given threshold value:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "SELECT * FROM table_out WHERE threshold=0.5;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the Area Under ROC curve function:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.area_under_roc( 'test_set', 'table_out', 'pred', 'obs');\n", "SELECT * FROM table_out;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multi-class classification" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create the sample data for confusion matrix." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS test_set;\n", "CREATE TABLE test_set AS\n", " SELECT (x+y)%5+1 AS pred,\n", " (x*y)%5 AS obs\n", " FROM generate_series(1,5) x,\n", " generate_series(1,5) y;\n", "SELECT * FROM test_set;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%sql\n", "DROP TABLE IF EXISTS table_out;\n", "SELECT madlib.confusion_matrix( 'test_set', 'table_out', 'pred', 'obs');\n", "SELECT * FROM table_out ORDER BY class;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Other examples" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }