example/recommenders/demo2-binary.ipynb (137 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Binary Predictions with Negative Sampling\n", "Example of a recommender system making binary predictions instead of predicting a rating.\n", "Demonstrates use of `NegativeSamplingDataIter` to wrap an existing data iterator with `CosineLoss`.\n", "See [BlackOut by Shihao Ji et al](https://arxiv.org/abs/1511.06909) for more on negative sampling.\n", "\n", "You need to have python package pandas and bokeh installed (pip install pandas bokeh)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import mxnet as mx\n", "from movielens_data import get_data_iter, max_id\n", "from matrix_fact import train\n", "import recotools" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# If MXNet is not compiled with GPU support (e.g. on OSX), set to [mx.cpu(0)]\n", "# Can be changed to [mx.gpu(0), mx.gpu(1), ..., mx.gpu(N-1)] if there are N GPUs\n", "ctx = [mx.gpu(0)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pos_train_data, pos_test_data = get_data_iter(batch_size=100)\n", "max_user, max_item = max_id('./ml-100k/u.data')\n", "(max_user, max_item)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "train_data = recotools.NegativeSamplingDataIter(pos_train_data, sample_ratio=3, positive_label=0, negative_label=1)\n", "test_data = recotools.NegativeSamplingDataIter(pos_test_data, sample_ratio=3, positive_label=0, negative_label=1)\n", "train_test_data = (train_data, test_data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def plain_net(k):\n", " # input\n", " user = mx.symbol.Variable('user')\n", " item = mx.symbol.Variable('item')\n", " label = mx.symbol.Variable('score')\n", " # user feature lookup\n", " user = mx.symbol.Embedding(data = user, input_dim = max_user, output_dim = k)\n", " # item feature lookup\n", " item = mx.symbol.Embedding(data = item, input_dim = max_item, output_dim = k)\n", " # loss layer\n", " pred = recotools.CosineLoss(a=user, b=item, label=label)\n", " return pred\n", "\n", "net1 = plain_net(64)\n", "mx.viz.plot_network(net1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "results1 = train(net1, train_test_data, num_epoch=20, learning_rate=0.02, ctx=ctx)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }