community-artifacts/Supervised-learning/Logistic-regression-v1.ipynb (892 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Logistic regression"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n",
" \"You should import from traitlets.config instead.\", ShimWarning)\n",
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
" warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
]
}
],
"source": [
"%load_ext sql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"u'Connected: fmcquillan@madlib'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Greenplum 4.3.10.0\n",
"# %sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n",
" \n",
"# PostgreSQL local\n",
"%sql postgresql://fmcquillan@localhost:5432/madlib"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>version</th>\n",
" </tr>\n",
" <tr>\n",
" <td>MADlib version: 1.12, git revision: unknown, cmake configuration time: Wed Aug 23 23:07:18 UTC 2017, build type: Release, build system: Darwin-16.7.0, C compiler: Clang, C++ compiler: Clang</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'MADlib version: 1.12, git revision: unknown, cmake configuration time: Wed Aug 23 23:07:18 UTC 2017, build type: Release, build system: Darwin-16.7.0, C compiler: Clang, C++ compiler: Clang',)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql select madlib.version();\n",
"#%sql select version();"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1. Load data\n",
"This data set is related to predicting a second heart attack given treatment and health factors."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"Done.\n",
"20 rows affected.\n",
"20 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>id</th>\n",
" <th>second_attack</th>\n",
" <th>treatment</th>\n",
" <th>trait_anxiety</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>70</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>65</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>75</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>70</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>65</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, 1, 1, 70),\n",
" (2, 1, 1, 80),\n",
" (3, 1, 1, 50),\n",
" (4, 1, 0, 60),\n",
" (5, 1, 0, 40),\n",
" (6, 1, 0, 65),\n",
" (7, 1, 0, 75),\n",
" (8, 1, 0, 80),\n",
" (9, 1, 0, 70),\n",
" (10, 1, 0, 60),\n",
" (11, 0, 1, 65),\n",
" (12, 0, 1, 50),\n",
" (13, 0, 1, 45),\n",
" (14, 0, 1, 35),\n",
" (15, 0, 1, 40),\n",
" (16, 0, 1, 50),\n",
" (17, 0, 0, 55),\n",
" (18, 0, 0, 45),\n",
" (19, 0, 0, 50),\n",
" (20, 0, 0, 60)]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS patients;\n",
"\n",
"CREATE TABLE patients( id INTEGER NOT NULL,\n",
" second_attack INTEGER,\n",
" treatment INTEGER,\n",
" trait_anxiety INTEGER);\n",
" \n",
"INSERT INTO patients VALUES \n",
"(1, 1, 1, 70),\n",
"(2, 1, 1, 80),\n",
"(3, 1, 1, 50),\n",
"(4, 1, 0, 60),\n",
"(5, 1, 0, 40),\n",
"(6, 1, 0, 65),\n",
"(7, 1, 0, 75),\n",
"(8, 1, 0, 80),\n",
"(9, 1, 0, 70),\n",
"(10, 1, 0, 60),\n",
"(11, 0, 1, 65),\n",
"(12, 0, 1, 50),\n",
"(13, 0, 1, 45),\n",
"(14, 0, 1, 35),\n",
"(15, 0, 1, 40),\n",
"(16, 0, 1, 50),\n",
"(17, 0, 0, 55),\n",
"(18, 0, 0, 45),\n",
"(19, 0, 0, 50),\n",
"(20, 0, 0, 60);\n",
"\n",
"SELECT * FROM patients ORDER BY id;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2. Train regression model"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>method</th>\n",
" <th>source_table</th>\n",
" <th>out_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>optimizer_params</th>\n",
" <th>num_all_groups</th>\n",
" <th>num_failed_groups</th>\n",
" <th>num_rows_processed</th>\n",
" <th>num_missing_rows_skipped</th>\n",
" <th>grouping_col</th>\n",
" </tr>\n",
" <tr>\n",
" <td>logregr</td>\n",
" <td>patients</td>\n",
" <td>patients_logregr</td>\n",
" <td>second_attack</td>\n",
" <td>ARRAY[1, treatment, trait_anxiety]</td>\n",
" <td>optimizer=irls, max_iter=20, tolerance=0.0001</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'logregr', u'patients', u'patients_logregr', u'second_attack', u'ARRAY[1, treatment, trait_anxiety]', u'optimizer=irls, max_iter=20, tolerance=0.0001', 1, 0, 20, 0, None)]"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS patients_logregr, patients_logregr_summary;\n",
"\n",
"SELECT madlib.logregr_train( 'patients', -- Source table\n",
" 'patients_logregr', -- Output table\n",
" 'second_attack', -- Dependent variable\n",
" 'ARRAY[1, treatment, trait_anxiety]', -- Feature vector\n",
" NULL, -- Grouping\n",
" 20, -- Max iterations\n",
" 'irls' -- Optimizer to use\n",
" );\n",
"\n",
"SELECT * FROM patients_logregr_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3. View regression results"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>coef</th>\n",
" <th>log_likelihood</th>\n",
" <th>std_err</th>\n",
" <th>z_stats</th>\n",
" <th>p_values</th>\n",
" <th>odds_ratios</th>\n",
" <th>condition_no</th>\n",
" <th>num_rows_processed</th>\n",
" <th>num_missing_rows_skipped</th>\n",
" <th>num_iterations</th>\n",
" <th>variance_covariance</th>\n",
" </tr>\n",
" <tr>\n",
" <td>[-6.36346994178192, -1.02410605239327, 0.119044916668607]</td>\n",
" <td>-9.41018298389</td>\n",
" <td>[3.21389766375099, 1.17107844860319, 0.0549790458269317]</td>\n",
" <td>[-1.97998524145757, -0.874498248699539, 2.16527796868916]</td>\n",
" <td>[0.0477051870698145, 0.381846973530455, 0.0303664045046183]</td>\n",
" <td>[0.00172337630923221, 0.359117354054956, 1.12642051220895]</td>\n",
" <td>326.081922792</td>\n",
" <td>20</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>[[10.329138193064, -0.474304665195738, -0.171995901260057], [-0.474304665195738, 1.37142473278286, -0.00119520703381591], [-0.171995901260057, -0.00119520703381591, 0.00302269548003986]]</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[([-6.36346994178192, -1.02410605239327, 0.119044916668607], -9.41018298388876, [3.21389766375099, 1.17107844860319, 0.0549790458269317], [-1.97998524145757, -0.874498248699539, 2.16527796868916], [0.0477051870698145, 0.381846973530455, 0.0303664045046183], [0.00172337630923221, 0.359117354054956, 1.12642051220895], 326.081922791575, 20L, 0L, 5, [[10.329138193064, -0.474304665195738, -0.171995901260057], [-0.474304665195738, 1.37142473278286, -0.00119520703381591], [-0.171995901260057, -0.00119520703381591, 0.00302269548003986]])]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * from patients_logregr;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 4. Output formatting\n",
"Alternatively, unnest the arrays in the results for easier reading of output:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>attribute</th>\n",
" <th>coefficient</th>\n",
" <th>standard_error</th>\n",
" <th>z_stat</th>\n",
" <th>pvalue</th>\n",
" <th>odds_ratio</th>\n",
" </tr>\n",
" <tr>\n",
" <td>intercept</td>\n",
" <td>-6.36346994178</td>\n",
" <td>3.21389766375</td>\n",
" <td>-1.97998524146</td>\n",
" <td>0.0477051870698</td>\n",
" <td>0.00172337630923</td>\n",
" </tr>\n",
" <tr>\n",
" <td>treatment</td>\n",
" <td>-1.02410605239</td>\n",
" <td>1.1710784486</td>\n",
" <td>-0.8744982487</td>\n",
" <td>0.38184697353</td>\n",
" <td>0.359117354055</td>\n",
" </tr>\n",
" <tr>\n",
" <td>trait_anxiety</td>\n",
" <td>0.119044916669</td>\n",
" <td>0.0549790458269</td>\n",
" <td>2.16527796869</td>\n",
" <td>0.0303664045046</td>\n",
" <td>1.12642051221</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'intercept', -6.36346994178192, 3.21389766375099, -1.97998524145757, 0.0477051870698145, 0.00172337630923221),\n",
" (u'treatment', -1.02410605239327, 1.17107844860319, -0.874498248699539, 0.381846973530455, 0.359117354054956),\n",
" (u'trait_anxiety', 0.119044916668607, 0.0549790458269317, 2.16527796868916, 0.0303664045046183, 1.12642051220895)]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT unnest(array['intercept', 'treatment', 'trait_anxiety']) as attribute,\n",
" unnest(coef) as coefficient,\n",
" unnest(std_err) as standard_error,\n",
" unnest(z_stats) as z_stat,\n",
" unnest(p_values) as pvalue,\n",
" unnest(odds_ratios) as odds_ratio\n",
" FROM patients_logregr;"
]
},
{
"cell_type": "markdown",
"metadata": {
"scrolled": true
},
"source": [
"# 5. Predict outcome\n",
"This example uses the original data table to perform the prediction. Typically a different test dataset with the same features as the original training dataset would be used for prediction."
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>id</th>\n",
" <th>logregr_predict</th>\n",
" <th>second_attack</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>20</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, True, True),\n",
" (2, True, True),\n",
" (3, False, True),\n",
" (4, True, True),\n",
" (5, False, True),\n",
" (6, True, True),\n",
" (7, True, True),\n",
" (8, True, True),\n",
" (9, True, True),\n",
" (10, True, True),\n",
" (11, True, False),\n",
" (12, False, False),\n",
" (13, False, False),\n",
" (14, False, False),\n",
" (15, False, False),\n",
" (16, False, False),\n",
" (17, True, False),\n",
" (18, False, False),\n",
" (19, False, False),\n",
" (20, True, False)]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"-- Display prediction value along with the original value\n",
"SELECT p.id, madlib.logregr_predict(coef, ARRAY[1, treatment, trait_anxiety]),\n",
" p.second_attack::BOOLEAN\n",
"FROM patients p, patients_logregr m\n",
"ORDER BY p.id;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 6. Predict probability\n",
"Predicting the probability of the dependent variable being TRUE."
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>id</th>\n",
" <th>logregr_predict_prob</th>\n",
" <th>second_attack</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.720223028942</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.894354902502</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.192269541755</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>0.685513072239</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>0.167747881509</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>0.798098108915</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>0.928568075753</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>0.959305763694</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>0.877576117431</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>0.685513072239</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>0.586700895943</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>0.192269541755</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>0.116032010633</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>0.0383829143135</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>0.0674976224148</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>0.192269541755</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>0.545870774303</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>0.267675422387</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>0.398618639285</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <td>20</td>\n",
" <td>0.685513072239</td>\n",
" <td>False</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, 0.720223028941527, True),\n",
" (2, 0.894354902502048, True),\n",
" (3, 0.192269541755171, True),\n",
" (4, 0.685513072239347, True),\n",
" (5, 0.167747881508857, True),\n",
" (6, 0.79809810891514, True),\n",
" (7, 0.928568075752503, True),\n",
" (8, 0.959305763693571, True),\n",
" (9, 0.877576117431452, True),\n",
" (10, 0.685513072239347, True),\n",
" (11, 0.586700895943317, False),\n",
" (12, 0.192269541755171, False),\n",
" (13, 0.116032010632994, False),\n",
" (14, 0.0383829143134982, False),\n",
" (15, 0.0674976224147597, False),\n",
" (16, 0.192269541755171, False),\n",
" (17, 0.545870774302621, False),\n",
" (18, 0.267675422387132, False),\n",
" (19, 0.398618639285111, False),\n",
" (20, 0.685513072239347, False)]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"-- Display prediction value along with the original value\n",
"SELECT p.id, madlib.logregr_predict_prob(coef, ARRAY[1, treatment, trait_anxiety]),\n",
" p.second_attack::BOOLEAN\n",
"FROM patients p, patients_logregr m\n",
"ORDER BY p.id;"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}