notebooks/10_tf_serving.ipynb

{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Run transformers with TF Serving.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "604ad97f7ca347bbbb24503d8101f2fe": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_0c17bf5056db458780fd46bce91f5813", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_1c2d6cb956ab49cca033b20bdaf5ccfe", "IPY_MODEL_5e86b20ceeb149fdac951eb000d6ffad" ] } }, "0c17bf5056db458780fd46bce91f5813": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1c2d6cb956ab49cca033b20bdaf5ccfe": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_d667528ddd1b4be9852cabd1b949d93d", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 642, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 642, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_91c07d6f7f7944cdb500905e7fd14fac" } }, "5e86b20ceeb149fdac951eb000d6ffad": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_c4f36947667c41ba8488020a56de7f34", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 642/642 [00:17<00:00, 35.8B/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_24fc03ccf5dd4767ae6921983471213c" } }, "d667528ddd1b4be9852cabd1b949d93d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "91c07d6f7f7944cdb500905e7fd14fac": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "c4f36947667c41ba8488020a56de7f34": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "24fc03ccf5dd4767ae6921983471213c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1d0e13b9685f4059a4e76d4a235f36c2": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_ee03458cd2f94484b24782f8949f5b3d", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_5abfcae24dcf400ba1939e32273d0ade", "IPY_MODEL_7035bde5b3174e61904b75fb10597e43" ] } }, "ee03458cd2f94484b24782f8949f5b3d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "5abfcae24dcf400ba1939e32273d0ade": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_6fb693a89fd941f786972e6f048601cb", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 438024457, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 438024457, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_2d1b167f1b3f42a18cbae16ec44d86c2" } }, "7035bde5b3174e61904b75fb10597e43": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_40880b11969d4af09b1a29d3b8ebd334", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 438M/438M [00:16<00:00, 25.9MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_ce98abb7f651489eb7ba8f6e5f426634" } }, "6fb693a89fd941f786972e6f048601cb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "2d1b167f1b3f42a18cbae16ec44d86c2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "40880b11969d4af09b1a29d3b8ebd334": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "ce98abb7f651489eb7ba8f6e5f426634": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "_dRh9TOQL5bZ" }, "source": [ "# TensorFlow Serving\n", "Now that we've seen the dramatic increase in computational performance, let's walk through a step-by-step explanation of how to deploy TFBert with TF Serving.\n", "\n", "## What is TensorFlow Serving?\n", "TensorFlow Serving belongs to the set of tools provided by TensorFlow Extended (TFX) that makes the task of deploying a model to a server easier than ever. TensorFlow Serving provides two APIs, one that can be called upon using HTTP requests and another one using gRPC to run inference on the server.\n", "\n", "## How to install TensorFlow Serving?\n", "There are three ways to install and use TensorFlow Serving, one is through a Docker container, another one through an apt package and a last one with pip. We will use the apt package in this notebook.\n", "\n", "## How to create a saved model?\n", "Saved model is the format expected by TensorFlow serving. In this colab we will\n", "run a saved model through the apt package of TF Serving. The limitation of this\n", "approach is that the model will be run only on CPU.\n" ] }, { "cell_type": "markdown", "metadata": { "id": "3CaUIN8LMotA" }, "source": [ "# Install the required packages" ] }, { "cell_type": "code", "metadata": { "id": "su_IYRbUu-cP", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "2c323b5d-b9c4-47fb-dd39-7d1a1d34e0f1" }, "source": [ "!pip install -Uq grpcio==1.32.0 transformers tensorflow_serving_api" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for transformers (PEP 517) ... \u001b[?25l\u001b[?25hdone\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "xkXAdEn-MtwI" }, "source": [ "# Import the required packages" ] }, { "cell_type": "code", "metadata": { "id": "_y3Em3Av3qYG" }, "source": [ "import os\r\n", "import requests\r\n", "import tempfile\r\n", "import json\r\n", "import numpy as np\r\n", "import tensorflow as tf\r\n", "from tensorflow_serving.apis import predict_pb2\r\n", "from tensorflow_serving.apis import prediction_service_pb2_grpc\r\n", "import grpc\r\n", "from transformers import TFBertForSequenceClassification, BertTokenizerFast, BertConfig" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "K4i2mOX8MyVE" }, "source": [ "# Create a saved model for sequence classification with BERT" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 778, "referenced_widgets": [ "604ad97f7ca347bbbb24503d8101f2fe", "0c17bf5056db458780fd46bce91f5813", "1c2d6cb956ab49cca033b20bdaf5ccfe", "5e86b20ceeb149fdac951eb000d6ffad", "d667528ddd1b4be9852cabd1b949d93d", "91c07d6f7f7944cdb500905e7fd14fac", "c4f36947667c41ba8488020a56de7f34", "24fc03ccf5dd4767ae6921983471213c", "1d0e13b9685f4059a4e76d4a235f36c2", "ee03458cd2f94484b24782f8949f5b3d", "5abfcae24dcf400ba1939e32273d0ade", "7035bde5b3174e61904b75fb10597e43", "6fb693a89fd941f786972e6f048601cb", "2d1b167f1b3f42a18cbae16ec44d86c2", "40880b11969d4af09b1a29d3b8ebd334", "ce98abb7f651489eb7ba8f6e5f426634" ] }, "id": "IMLCFxPv4Jpx", "outputId": "52f42e00-b741-415b-a86e-3632d6e6be8e" }, "source": [ "MODEL_DIR = tempfile.gettempdir()\r\n", "model = TFBertForSequenceClassification.from_pretrained(\"nateraw/bert-base-uncased-imdb\", from_pt=True)\r\n", "# the saved_model parameter is a flag to create a saved model version of the model in same time than the h5 weights\r\n", "model.save_pretrained(MODEL_DIR, saved_model=True)\r\n", "os.environ[\"MODEL_DIR\"] = os.path.join(MODEL_DIR, \"saved_model\")" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "604ad97f7ca347bbbb24503d8101f2fe", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=642.0, style=ProgressStyle(description_…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1d0e13b9685f4059a4e76d4a235f36c2", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=438024457.0, style=ProgressStyle(descri…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForSequenceClassification: ['bert.embeddings.position_ids']\n", "- This IS expected if you are initializing TFBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing TFBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n", "All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "WARNING:tensorflow:AutoGraph could not transform <bound method Socket.send of <zmq.sugar.socket.Socket object at 0x7fa87cba9660>> and will run it as-is.\n", "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", "Cause: <cyfunction Socket.send at 0x7fa8943f2e58> is not a module, class, method, function, traceback, frame, or code object\n", "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n", "WARNING: AutoGraph could not transform <bound method Socket.send of <zmq.sugar.socket.Socket object at 0x7fa87cba9660>> and will run it as-is.\n", "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", "Cause: <cyfunction Socket.send at 0x7fa8943f2e58> is not a module, class, method, function, traceback, frame, or code object\n", "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "WARNING:tensorflow:AutoGraph could not transform <function wrap at 0x7fa891d868c8> and will run it as-is.\n", "Cause: while/else statement not yet supported\n", "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "WARNING: AutoGraph could not transform <function wrap at 0x7fa891d868c8> and will run it as-is.\n", "Cause: while/else statement not yet supported\n", "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n", "The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n", "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n", "The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n", "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n", "The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n", "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n", "The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n", "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n", "The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n", "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n", "The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n", "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n", "The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n", "WARNING:absl:Found untraced functions such as embeddings_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, encoder_layer_call_and_return_conditional_losses, encoder_layer_call_fn, pooler_layer_call_and_return_conditional_losses while saving (showing 5 of 1065). These functions will not be directly callable after loading.\n", "WARNING:absl:Found untraced functions such as embeddings_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, encoder_layer_call_and_return_conditional_losses, encoder_layer_call_fn, pooler_layer_call_and_return_conditional_losses while saving (showing 5 of 1065). These functions will not be directly callable after loading.\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: /tmp/saved_model/1/assets\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: /tmp/saved_model/1/assets\n" ], "name": "stderr" } ] }, { "cell_type": "markdown", "metadata": { "id": "UXrHdYcQM5bR" }, "source": [ "# Check if the saved model is properly formatted" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fmH2QNts6Q7r", "outputId": "28683f0f-195c-4341-a23c-bcfd4d5b206a" }, "source": [ "!saved_model_cli show --dir {MODEL_DIR}/saved_model/1 --tag_set serve --signature_def serving_default" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "The given SavedModel SignatureDef contains the following input(s):\n", " inputs['attention_mask'] tensor_info:\n", " dtype: DT_INT32\n", " shape: (-1, -1)\n", " name: serving_default_attention_mask:0\n", " inputs['input_ids'] tensor_info:\n", " dtype: DT_INT32\n", " shape: (-1, -1)\n", " name: serving_default_input_ids:0\n", " inputs['token_type_ids'] tensor_info:\n", " dtype: DT_INT32\n", " shape: (-1, -1)\n", " name: serving_default_token_type_ids:0\n", "The given SavedModel SignatureDef contains the following output(s):\n", " outputs['logits'] tensor_info:\n", " dtype: DT_FLOAT\n", " shape: (-1, 2)\n", " name: StatefulPartitionedCall:0\n", "Method name is: tensorflow/serving/predict\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "1K8jajJhNLGF" }, "source": [ "# Install the APT package for TF Serving" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "m8IDiDWl677i", "outputId": "f40f3333-43cf-483b-eddc-bc604e91d31f" }, "source": [ "!echo \"deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal\" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \\\r\n", "curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -\r\n", "!apt update\r\n", "!apt-get install tensorflow-model-server" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r100 2943 100 2943 0 0 22813 0 --:--:-- --:--:-- --:--:-- 22813\n", "OK\n", "Get:1 http://storage.googleapis.com/tensorflow-serving-apt stable InRelease [3,012 B]\n", "Get:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]\n", "Ign:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n", "Ign:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n", "Hit:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release\n", "Get:6 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release [564 B]\n", "Get:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release.gpg [833 B]\n", "Get:8 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease [15.9 kB]\n", "Hit:9 http://archive.ubuntu.com/ubuntu bionic InRelease\n", "Get:10 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ Packages [43.2 kB]\n", "Get:11 http://storage.googleapis.com/tensorflow-serving-apt stable/tensorflow-model-server-universal amd64 Packages [346 B]\n", "Get:12 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n", "Get:13 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n", "Get:15 http://storage.googleapis.com/tensorflow-serving-apt stable/tensorflow-model-server amd64 Packages [340 B]\n", "Hit:16 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n", "Get:17 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Packages [66.5 kB]\n", "Get:18 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]\n", "Get:19 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease [21.3 kB]\n", "Get:20 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main Sources [1,707 kB]\n", "Get:21 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [2,140 kB]\n", "Get:22 http://security.ubuntu.com/ubuntu bionic-security/restricted amd64 Packages [261 kB]\n", "Get:23 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [1,869 kB]\n", "Get:24 http://archive.ubuntu.com/ubuntu bionic-updates/restricted amd64 Packages [304 kB]\n", "Get:25 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [2,296 kB]\n", "Get:26 http://archive.ubuntu.com/ubuntu bionic-updates/multiverse amd64 Packages [45.6 kB]\n", "Get:27 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [1,376 kB]\n", "Get:28 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main amd64 Packages [874 kB]\n", "Get:29 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic/main amd64 Packages [49.2 kB]\n", "Fetched 11.3 MB in 4s (2,854 kB/s)\n", "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "39 packages can be upgraded. Run 'apt list --upgradable' to see them.\n", "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "The following NEW packages will be installed:\n", " tensorflow-model-server\n", "0 upgraded, 1 newly installed, 0 to remove and 39 not upgraded.\n", "Need to get 223 MB of archives.\n", "After this operation, 0 B of additional disk space will be used.\n", "Get:1 http://storage.googleapis.com/tensorflow-serving-apt stable/tensorflow-model-server amd64 tensorflow-model-server all 2.4.0 [223 MB]\n", "Fetched 223 MB in 3s (77.9 MB/s)\n", "Selecting previously unselected package tensorflow-model-server.\n", "(Reading database ... 145483 files and directories currently installed.)\n", "Preparing to unpack .../tensorflow-model-server_2.4.0_all.deb ...\n", "Unpacking tensorflow-model-server (2.4.0) ...\n", "Setting up tensorflow-model-server (2.4.0) ...\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "4hrJcNGYNRXY" }, "source": [ "# Run a TF Serving server" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MCFvNb9c7ImM", "outputId": "1bc3be82-0046-4504-92e2-d9d9766794a8" }, "source": [ "%%bash --bg \r\n", "nohup tensorflow_model_server \\\r\n", " --rest_api_port=8501 \\\r\n", " --grpc_api_port=8500 \\\r\n", " --model_name=bert \\\r\n", " --model_base_path=\"${MODEL_DIR}\" >server.log 2>&1\r\n" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Starting job # 0 in a separate thread.\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "c4WeE0xrNWy4" }, "source": [ "# Check if the server runs properly" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YK963rMS_4sT", "outputId": "4599a143-0879-4457-b93a-6f069465589e" }, "source": [ "!tail server.log" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "2021-01-11 11:08:20.232714: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:206] Restoring SavedModel bundle.\n", "2021-01-11 11:08:20.244805: I external/org_tensorflow/tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2200000000 Hz\n", "2021-01-11 11:08:20.738030: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:190] Running initialization op on SavedModel bundle at path: /tmp/saved_model/1\n", "2021-01-11 11:08:20.821794: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:277] SavedModel load for tags { serve }; Status: success: OK. Took 721751 microseconds.\n", "2021-01-11 11:08:20.836630: I tensorflow_serving/servables/tensorflow/saved_model_warmup_util.cc:59] No warmup data file found at /tmp/saved_model/1/assets.extra/tf_serving_warmup_requests\n", "2021-01-11 11:08:20.836767: I tensorflow_serving/core/loader_harness.cc:87] Successfully loaded servable version {name: bert version: 1}\n", "2021-01-11 11:08:20.838416: I tensorflow_serving/model_servers/server.cc:371] Running gRPC ModelServer at 0.0.0.0:8500 ...\n", "[warn] getaddrinfo: address family for nodename not supported\n", "2021-01-11 11:08:20.838977: I tensorflow_serving/model_servers/server.cc:391] Exporting HTTP/REST API at:localhost:8501 ...\n", "[evhttp_server.cc : 238] NET_LOG: Entering the event loop ...\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "fZUJPhzQNfwS" }, "source": [ "# Create the requirements for the tests" ] }, { "cell_type": "code", "metadata": { "id": "kLcgca_cJRcf" }, "source": [ "sentence = \"I love the new TensorFlow update in transformers.\"\r\n", "# Load the corresponding tokenizer of our saved model\r\n", "tokenizer = BertTokenizerFast.from_pretrained(\"nateraw/bert-base-uncased-imdb\")\r\n", "# Load the model config of our saved model\r\n", "config = BertConfig.from_pretrained(\"nateraw/bert-base-uncased-imdb\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "ctbtVblONm6a" }, "source": [ "# Run an inference over the REST API" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iZLv4FCQ7Tzq", "outputId": "b8f17fa9-281b-4d26-a626-6c335f3fe900" }, "source": [ "# Tokenize the sentence\r\n", "batch = tokenizer(sentence)\r\n", "# Convert the batch into a proper dict\r\n", "batch = dict(batch)\r\n", "# Put the example into a list of size 1, that corresponds to the batch size\r\n", "batch = [batch]\r\n", "# The REST API needs a JSON that contains the key instances to declare the examples to process\r\n", "input_data = {\"instances\": batch}\r\n", "# Query the REST API, the path corresponds to http://host:port/model_version/models_root_folder/model_name:method\r\n", "r = requests.post(\"http://localhost:8501/v1/models/bert:predict\", data=json.dumps(input_data))\r\n", "# Parse the JSON result. The results are contained in a list with a root key called \"predictions\"\r\n", "# and as there is only one example, takes the first element of the list\r\n", "result = json.loads(r.text)[\"predictions\"][0]\r\n", "# The returned results are probabilities, that can be positive/negative hence we take their absolute value\r\n", "abs_scores = np.abs(result)\r\n", "# Take the argmax that correspond to the index of the max probability.\r\n", "label_id = np.argmax(abs_scores)\r\n", "# Print the proper LABEL with its index\r\n", "print(config.id2label[label_id])" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "POSITIVE\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "xtTyoQwANrfG" }, "source": [ "# Run an inference over the gRPC API" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wJTo40GtImeR", "outputId": "c20dd617-9e61-42cc-b412-2a40c1c13675" }, "source": [ "# Tokenize the sentence but this time with TensorFlow tensors as output already batch sized to 1. Ex:\r\n", "# {\r\n", "# 'input_ids': <tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[ 101, 19082, 102]])>,\r\n", "# 'token_type_ids': <tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[0, 0, 0]])>,\r\n", "# 'attention_mask': <tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[1, 1, 1]])>\r\n", "# }\r\n", "batch = tokenizer(sentence, return_tensors=\"tf\")\r\n", "# Create a channel that will be connected to the gRPC port of the container\r\n", "channel = grpc.insecure_channel(\"localhost:8500\")\r\n", "# Create a stub made for prediction. This stub will be used to send the gRPC request to the TF Server.\r\n", "stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)\r\n", "# Create a gRPC request made for prediction\r\n", "request = predict_pb2.PredictRequest()\r\n", "# Set the name of the model, for this use case it is bert\r\n", "request.model_spec.name = \"bert\"\r\n", "# Set which signature is used to format the gRPC query, here the default one\r\n", "request.model_spec.signature_name = \"serving_default\"\r\n", "# Set the input_ids input from the input_ids given by the tokenizer\r\n", "# tf.make_tensor_proto turns a TensorFlow tensor into a Protobuf tensor\r\n", "request.inputs[\"input_ids\"].CopyFrom(tf.make_tensor_proto(batch[\"input_ids\"]))\r\n", "# Same with attention mask\r\n", "request.inputs[\"attention_mask\"].CopyFrom(tf.make_tensor_proto(batch[\"attention_mask\"]))\r\n", "# Same with token type ids\r\n", "request.inputs[\"token_type_ids\"].CopyFrom(tf.make_tensor_proto(batch[\"token_type_ids\"]))\r\n", "# Send the gRPC request to the TF Server\r\n", "result = stub.Predict(request)\r\n", "\r\n", "# The output is a protobuf where the only one output is a list of probabilities\r\n", "# assigned to the key logits. As the probabilities as in float, the list is\r\n", "# converted into a numpy array of floats with .float_val\r\n", "output = result.outputs[\"logits\"].float_val\r\n", "# Print the proper LABEL with its index\r\n", "print(config.id2label[np.argmax(np.abs(output))])" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "POSITIVE\n" ], "name": "stdout" } ] } ] }

notebooks/10_tf_serving.ipynb (1,137 lines of code) (raw):