notebooks/111_tf_serving_vision.ipynb (1,612 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "z2bwRLGzGQlk"
},
"source": [
"This notebook shows how to deploy a vision model in TensorFlow from 🤗 Transformers with TensorFlow Serving. It uses [this blog post](https://huggingface.co/blog/tf-serving) as a reference. "
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "pGC5JtQbGktj"
},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-kgm_ksCPTkJ",
"outputId": "d9de5a1d-066a-4f6e-865d-64a0dfa72357"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[K |████████████████████████████████| 4.4 MB 4.4 MB/s \n",
"\u001b[K |████████████████████████████████| 6.6 MB 23.1 MB/s \n",
"\u001b[K |████████████████████████████████| 596 kB 39.9 MB/s \n",
"\u001b[K |████████████████████████████████| 101 kB 9.9 MB/s \n",
"\u001b[K |████████████████████████████████| 511.7 MB 5.5 kB/s \n",
"\u001b[K |████████████████████████████████| 438 kB 46.6 MB/s \n",
"\u001b[K |████████████████████████████████| 5.8 MB 33.5 MB/s \n",
"\u001b[K |████████████████████████████████| 1.6 MB 41.1 MB/s \n",
"\u001b[?25h"
]
}
],
"source": [
"!pip install -q transformers\n",
"!pip install -q tensorflow_serving_api"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Wo-b-LuuGnEK"
},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1cuyr4mcP5Su"
},
"outputs": [],
"source": [
"from transformers import ViTImageProcessor, TFViTForImageClassification\n",
"import tensorflow as tf\n",
"import tempfile\n",
"import requests\n",
"import base64\n",
"import json\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "bGCF3EyL92cT",
"outputId": "492d4032-58ad-4c01-de73-dfce7f750092"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4.20.1\n"
]
}
],
"source": [
"import transformers\n",
"\n",
"print(transformers.__version__)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jfKdDDqCGpHo"
},
"source": [
"## Save the ViT model and investigate its inputs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 223,
"referenced_widgets": [
"e6ec5d13fc6d4296b8f76bbba303701d",
"1d5c3a22585945a0af67dd2d3b099a1b",
"558c290455454b4f818c15821d3802a6",
"9def5996860e44f99bfc3c0274a8f821",
"b74450ee387e445dbfb832a9d1112a59",
"c2903f0f04b04c8a8312d5f93d36093a",
"036b74dd4d8a4127a52a427916391b3d",
"31fe4b9f063a4b4b94c68852cef75371",
"62b1ff5b24ec4325a054307248c8b9d7",
"4b0c66e6eb4b4513b35f94413bd64f4f",
"367e751d2c484c16a0005b5e9c31f154",
"d853522fb35f4894b935b360a4afb1b0",
"921b59c2982b4a27be0d48934f00d088",
"3b9c5f31e80e4791894e799f56f5e541",
"373d2e80588042ad815df8f928810439",
"968f597834ec4668a0ce90170b9de04e",
"59a97d2f8fa44a00a0d0895956c8e5cb",
"88742141b97b41b291ae2b2694ba8721",
"7ab98f95f0fc4a6f97d1f33a210f3967",
"a05385eb6d104ab68cc605406e44a011",
"736737ab9fb44a579055dcfa0c21cb85",
"47b9562141a74f00a893b2820000350a"
]
},
"id": "eqOIWKJfPnDk",
"outputId": "29ac45f0-82f4-4f66-c268-dbec8fc391fa"
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e6ec5d13fc6d4296b8f76bbba303701d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading: 0%| | 0.00/68.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d853522fb35f4894b935b360a4afb1b0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading: 0%| | 0.00/330M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"All model checkpoint layers were used when initializing TFViTForImageClassification.\n",
"\n",
"All the layers of TFViTForImageClassification were initialized from the model checkpoint at google/vit-base-patch16-224.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.\n",
"WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, layernorm_layer_call_fn while saving (showing 5 of 421). These functions will not be directly callable after loading.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: resnet/saved_model/1/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: resnet/saved_model/1/assets\n"
]
}
],
"source": [
"# the saved_model parameter is a flag to create a saved model version of the model\n",
"temp_model_dir = \"vit\"\n",
"model = TFViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\")\n",
"model.save_pretrained(temp_model_dir, saved_model=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fv9VDVBzTzZr",
"outputId": "2e1bbc98-e54f-48a3-903d-3df375782986"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The given SavedModel SignatureDef contains the following input(s):\n",
" inputs['pixel_values'] tensor_info:\n",
" dtype: DT_FLOAT\n",
" shape: (-1, -1, -1, -1)\n",
" name: serving_default_pixel_values:0\n",
"The given SavedModel SignatureDef contains the following output(s):\n",
" outputs['logits'] tensor_info:\n",
" dtype: DT_FLOAT\n",
" shape: (-1, 1000)\n",
" name: StatefulPartitionedCall:0\n",
"Method name is: tensorflow/serving/predict\n"
]
}
],
"source": [
"!saved_model_cli show --dir {temp_model_dir}/saved_model/1 --tag_set serve --signature_def serving_default"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0567_tMkGvVx"
},
"source": [
"## Save the model embedding pre-processing and post-processing ops"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1H-LEF8FP2G8",
"outputId": "41c5c91f-45e3-4fa0-96fe-9e596334593d"
},
"outputs": [
{
"data": {
"text/plain": [
"ViTFeatureExtractor {\n",
" \"do_normalize\": true,\n",
" \"do_resize\": true,\n",
" \"feature_extractor_type\": \"ViTFeatureExtractor\",\n",
" \"image_mean\": [\n",
" 0.5,\n",
" 0.5,\n",
" 0.5\n",
" ],\n",
" \"image_std\": [\n",
" 0.5,\n",
" 0.5,\n",
" 0.5\n",
" ],\n",
" \"resample\": 2,\n",
" \"size\": 224\n",
"}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"processor = ViTImageProcessor()\n",
"processor"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RvsoVW_0Qb2k"
},
"outputs": [],
"source": [
"CONCRETE_INPUT = \"pixel_values\"\n",
"SIZE = processor.size[\"height\"]\n",
"INPUT_SHAPE = (SIZE, SIZE, 3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2YeDe1M5Q5AP"
},
"outputs": [],
"source": [
"def normalize_img(\n",
" img, mean=processor.image_mean, std=processor.image_std\n",
"):\n",
" # Scale to the value range of [0, 1] first and then normalize.\n",
" img = img / 255\n",
" mean = tf.constant(mean)\n",
" std = tf.constant(std)\n",
" return (img - mean) / std\n",
"\n",
"\n",
"def preprocess(string_input):\n",
" decoded_input = tf.io.decode_base64(string_input)\n",
" decoded = tf.io.decode_jpeg(decoded_input, channels=3)\n",
" resized = tf.image.resize(decoded, size=(SIZE, SIZE))\n",
" normalized = normalize_img(resized)\n",
" normalized = tf.transpose(\n",
" normalized, (2, 0, 1)\n",
" ) # Since HF models are channel-first.\n",
" return normalized\n",
"\n",
"\n",
"@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n",
"def preprocess_fn(string_input):\n",
" decoded_images = tf.map_fn(\n",
" preprocess, string_input, dtype=tf.float32, back_prop=False\n",
" )\n",
" return {CONCRETE_INPUT: decoded_images}\n",
"\n",
"\n",
"def model_exporter(model: tf.keras.Model):\n",
" m_call = tf.function(model.call).get_concrete_function(\n",
" tf.TensorSpec(\n",
" shape=[None, 3, SIZE, SIZE], dtype=tf.float32, name=CONCRETE_INPUT\n",
" )\n",
" )\n",
"\n",
" @tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n",
" def serving_fn(string_input):\n",
" labels = tf.constant(list(model.config.id2label.values()), dtype=tf.string)\n",
" images = preprocess_fn(string_input)\n",
"\n",
" predictions = m_call(**images)\n",
" indices = tf.argmax(predictions.logits, axis=1)\n",
" pred_source = tf.gather(params=labels, indices=indices)\n",
" probs = tf.nn.softmax(predictions.logits, axis=1)\n",
" pred_confidence = tf.reduce_max(probs, axis=1)\n",
" return {\"label\": pred_source, \"confidence\": pred_confidence}\n",
"\n",
" return serving_fn"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "J9Xdj_OiF6TG"
},
"source": [
"**Notes on making the model accept string inputs**:\n",
"\n",
"When dealing with images via REST or gRPC requests the size of the request payload can easily spiral up depending on the resolution of the images being passed. This is why, it is good practice to compress them reliably and then prepare the request payload."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7IGRj3kAS7oZ",
"outputId": "fbb4464a-d48a-4006-fdcb-8ee5920fe9c1"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py:458: calling map_fn_v2 (from tensorflow.python.ops.map_fn) with back_prop=False is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"back_prop=False is deprecated. Consider using tf.stop_gradient instead.\n",
"Instead of:\n",
"results = tf.map_fn(fn, elems, back_prop=False)\n",
"Use:\n",
"results = tf.nest.map_structure(tf.stop_gradient, tf.map_fn(fn, elems))\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py:458: calling map_fn_v2 (from tensorflow.python.ops.map_fn) with back_prop=False is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"back_prop=False is deprecated. Consider using tf.stop_gradient instead.\n",
"Instead of:\n",
"results = tf.map_fn(fn, elems, back_prop=False)\n",
"Use:\n",
"results = tf.nest.map_structure(tf.stop_gradient, tf.map_fn(fn, elems))\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/util/deprecation.py:629: calling map_fn_v2 (from tensorflow.python.ops.map_fn) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use fn_output_signature instead\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/util/deprecation.py:629: calling map_fn_v2 (from tensorflow.python.ops.map_fn) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use fn_output_signature instead\n",
"WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, layernorm_layer_call_fn while saving (showing 5 of 421). These functions will not be directly callable after loading.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: /tmp/1/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: /tmp/1/assets\n"
]
}
],
"source": [
"MODEL_DIR = tempfile.gettempdir()\n",
"VERSION = 1\n",
"\n",
"tf.saved_model.save(\n",
" model,\n",
" os.path.join(MODEL_DIR, str(VERSION)),\n",
" signatures={\"serving_default\": model_exporter(model)},\n",
")\n",
"os.environ[\"MODEL_DIR\"] = MODEL_DIR"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_TlzGe8XG1gx"
},
"source": [
"Investigate the `SavedModel` once again. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QXXz4BDkY2A_",
"outputId": "15db1f42-93fe-4e8d-e5ee-ea237164da09"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The given SavedModel SignatureDef contains the following input(s):\n",
" inputs['string_input'] tensor_info:\n",
" dtype: DT_STRING\n",
" shape: (-1)\n",
" name: serving_default_string_input:0\n",
"The given SavedModel SignatureDef contains the following output(s):\n",
" outputs['confidence'] tensor_info:\n",
" dtype: DT_FLOAT\n",
" shape: (-1)\n",
" name: StatefulPartitionedCall:0\n",
" outputs['label'] tensor_info:\n",
" dtype: DT_STRING\n",
" shape: (-1)\n",
" name: StatefulPartitionedCall:1\n",
"Method name is: tensorflow/serving/predict\n"
]
}
],
"source": [
"!saved_model_cli show --dir {MODEL_DIR}/1 --tag_set serve --signature_def serving_default"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "MJU-LJuwG5eC"
},
"source": [
"## Install TensorFlow Model Server"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XYgQ9ufJZTV1",
"outputId": "0ab1d58e-79f8-4828-8f6c-f471229cd7b1"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2022-07-15 04:11:11-- http://storage.googleapis.com/tensorflow-serving-apt/pool/tensorflow-model-server-universal-2.8.0/t/tensorflow-model-server-universal/tensorflow-model-server-universal_2.8.0_all.deb\n",
"Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.8.128, 74.125.23.128, 74.125.203.128, ...\n",
"Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.8.128|:80... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 335421916 (320M) [application/x-debian-package]\n",
"Saving to: ‘tensorflow-model-server-universal_2.8.0_all.deb’\n",
"\n",
"tensorflow-model-se 100%[===================>] 319.88M 68.7MB/s in 4.7s \n",
"\n",
"2022-07-15 04:11:16 (68.7 MB/s) - ‘tensorflow-model-server-universal_2.8.0_all.deb’ saved [335421916/335421916]\n",
"\n",
"Selecting previously unselected package tensorflow-model-server-universal.\n",
"(Reading database ... 155653 files and directories currently installed.)\n",
"Preparing to unpack tensorflow-model-server-universal_2.8.0_all.deb ...\n",
"Unpacking tensorflow-model-server-universal (2.8.0) ...\n",
"Setting up tensorflow-model-server-universal (2.8.0) ...\n"
]
}
],
"source": [
"# Deviates from the original installation instructions.\n",
"# https://issuemode.com/issues/tensorflow/serving/92945160\n",
"!wget 'http://storage.googleapis.com/tensorflow-serving-apt/pool/tensorflow-model-server-universal-2.8.0/t/tensorflow-model-server-universal/tensorflow-model-server-universal_2.8.0_all.deb'\n",
"!dpkg -i tensorflow-model-server-universal_2.8.0_all.deb"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "k5m_7XZjG8Qo"
},
"source": [
"## Deploy the model \n",
"\n",
"By default TF Serving exposes two APIs: REST and gRPC. We will see how to infer with both. Each has their own pros and cons."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "be609R85hiik",
"outputId": "148b3fcb-6243-4d69-e163-3c30c6cbaaa2"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting job # 0 in a separate thread.\n"
]
}
],
"source": [
"%%bash --bg \n",
"nohup tensorflow_model_server \\\n",
" --rest_api_port=8501 \\\n",
" --model_name=vit \\\n",
" --model_base_path=$MODEL_DIR >server.log 2>&1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1r9O1Q_CjJnI",
"outputId": "40ab765e-6017-436e-e94f-7bff927e6c44"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[warn] getaddrinfo: address family for nodename not supported\n",
"[evhttp_server.cc : 245] NET_LOG: Entering the event loop ...\n"
]
}
],
"source": [
"!cat server.log"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "S1wSLcPwkAuK",
"outputId": "d379d947-e6d6-45af-8ec3-b1c72ec112da"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"node 8 root 21u IPv6 26436 0t0 TCP *:8080 (LISTEN)\n",
"colab-fil 30 root 5u IPv6 26409 0t0 TCP *:3453 (LISTEN)\n",
"colab-fil 30 root 6u IPv4 26410 0t0 TCP *:3453 (LISTEN)\n",
"jupyter-n 43 root 6u IPv4 27130 0t0 TCP 172.28.0.2:9000 (LISTEN)\n",
"python3 60 root 15u IPv4 30327 0t0 TCP 127.0.0.1:46129 (LISTEN)\n",
"python3 60 root 18u IPv4 30331 0t0 TCP 127.0.0.1:58207 (LISTEN)\n",
"python3 60 root 21u IPv4 30335 0t0 TCP 127.0.0.1:44103 (LISTEN)\n",
"python3 60 root 24u IPv4 30339 0t0 TCP 127.0.0.1:53393 (LISTEN)\n",
"python3 60 root 30u IPv4 30345 0t0 TCP 127.0.0.1:46873 (LISTEN)\n",
"python3 60 root 43u IPv4 31046 0t0 TCP 127.0.0.1:59625 (LISTEN)\n",
"python3 80 root 3u IPv4 31602 0t0 TCP 127.0.0.1:20352 (LISTEN)\n",
"python3 80 root 4u IPv4 31603 0t0 TCP 127.0.0.1:34417 (LISTEN)\n",
"python3 80 root 9u IPv4 32828 0t0 TCP 127.0.0.1:36819 (LISTEN)\n",
"tensorflo 259 root 5u IPv4 93837 0t0 TCP *:8500 (LISTEN)\n",
"tensorflo 259 root 12u IPv4 92983 0t0 TCP *:8501 (LISTEN)\n"
]
}
],
"source": [
"!sudo lsof -i -P -n | grep LISTEN"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "aXJLcpoYke-d"
},
"source": [
"## REST API"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JhG4DRPakf5D",
"outputId": "d23f844d-9da1-4f2f-c5e2-820f23994460"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading data from http://images.cocodataset.org/val2017/000000039769.jpg\n",
"173131/173131 [==============================] - 1s 3us/step\n",
"Data: {\"signature_name\": \"serving_default\", \"instances\": ... TRmYgEHbbrYWv0A6b4o2n1HZgYLq91nP-o7O2pcNa6r__2Q==\"]}\n"
]
}
],
"source": [
"image_path = tf.keras.utils.get_file(\n",
" \"image.jpg\", \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
")\n",
"bytes_inputs = tf.io.read_file(image_path)\n",
"b64str = base64.urlsafe_b64encode(bytes_inputs.numpy()).decode(\"utf-8\")\n",
"\n",
"data = json.dumps({\"signature_name\": \"serving_default\", \"instances\": [b64str]})\n",
"print(\"Data: {} ... {}\".format(data[:50], data[len(data) - 52 :]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vrqZA1Qylv_D",
"outputId": "7b87bb1d-3f52-4a3b-e0b2-6d564616a0a3"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'predictions': [{'label': 'Egyptian cat', 'confidence': 0.896659195}]}\n"
]
}
],
"source": [
"headers = {\"content-type\": \"application/json\"}\n",
"json_response = requests.post(\n",
" \"http://localhost:8501/v1/models/vit:predict\", data=data, headers=headers\n",
")\n",
"print(json.loads(json_response.text))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_-yy3u3kmNQO"
},
"source": [
"## gRPC "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "f7gAQSo6EdKi"
},
"outputs": [],
"source": [
"import grpc\n",
"from tensorflow_serving.apis import predict_pb2\n",
"from tensorflow_serving.apis import prediction_service_pb2_grpc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qlArzjKvEZ3N"
},
"outputs": [],
"source": [
"channel = grpc.insecure_channel(\"localhost:8500\")\n",
"stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VOU72ENGE1Xq",
"outputId": "3362cc36-3e70-4d90-9689-ced9e1dd1187"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Serving function input: string_input\n"
]
}
],
"source": [
"loaded = tf.saved_model.load(f\"{MODEL_DIR}/{VERSION}\")\n",
"serving_input = list(\n",
" loaded.signatures[\"serving_default\"].structured_input_signature[1].keys()\n",
")[0]\n",
"print(\"Serving function input:\", serving_input)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jBloKw6REfTq"
},
"outputs": [],
"source": [
"request = predict_pb2.PredictRequest()\n",
"request.model_spec.name = \"vit\"\n",
"request.model_spec.signature_name = \"serving_default\"\n",
"request.inputs[serving_input].CopyFrom(tf.make_tensor_proto([b64str]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mMRC_qp8FNtT",
"outputId": "a6452d17-07af-46b0-ba14-450690ffa523"
},
"outputs": [
{
"data": {
"text/plain": [
"outputs {\n",
" key: \"confidence\"\n",
" value {\n",
" dtype: DT_FLOAT\n",
" tensor_shape {\n",
" dim {\n",
" size: 1\n",
" }\n",
" }\n",
" float_val: 0.8966591954231262\n",
" }\n",
"}\n",
"outputs {\n",
" key: \"label\"\n",
" value {\n",
" dtype: DT_STRING\n",
" tensor_shape {\n",
" dim {\n",
" size: 1\n",
" }\n",
" }\n",
" string_val: \"Egyptian cat\"\n",
" }\n",
"}\n",
"model_spec {\n",
" name: \"resnet\"\n",
" version {\n",
" value: 1\n",
" }\n",
" signature_name: \"serving_default\"\n",
"}"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grpc_predictions = stub.Predict(request, 10.0) # 10 secs timeout\n",
"print(grpc_predictions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lEpEU508Fj3G",
"outputId": "5d635b49-33ed-449b-fd65-4223274f53f5"
},
"outputs": [
{
"data": {
"text/plain": [
"([b'Egyptian cat'], [0.8966591954231262])"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grpc_predictions.outputs[\"label\"].string_val, grpc_predictions.outputs[\n",
" \"confidence\"\n",
"].float_val"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YYIcV9BsHGy-"
},
"source": [
"## Next steps\n",
"\n",
"* Deploy the SavedModel to Vertex AI \n",
"* Deploy with TF Serving + Kubernetes (via GKE)"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "hf-vision-model-tfserving.ipynb",
"provenance": []
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"036b74dd4d8a4127a52a427916391b3d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"1d5c3a22585945a0af67dd2d3b099a1b": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c2903f0f04b04c8a8312d5f93d36093a",
"placeholder": "​",
"style": "IPY_MODEL_036b74dd4d8a4127a52a427916391b3d",
"value": "Downloading: 100%"
}
},
"31fe4b9f063a4b4b94c68852cef75371": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"367e751d2c484c16a0005b5e9c31f154": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"373d2e80588042ad815df8f928810439": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_736737ab9fb44a579055dcfa0c21cb85",
"placeholder": "​",
"style": "IPY_MODEL_47b9562141a74f00a893b2820000350a",
"value": " 330M/330M [00:24<00:00, 15.3MB/s]"
}
},
"3b9c5f31e80e4791894e799f56f5e541": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7ab98f95f0fc4a6f97d1f33a210f3967",
"max": 346537664,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_a05385eb6d104ab68cc605406e44a011",
"value": 346537664
}
},
"47b9562141a74f00a893b2820000350a": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"4b0c66e6eb4b4513b35f94413bd64f4f": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"558c290455454b4f818c15821d3802a6": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_31fe4b9f063a4b4b94c68852cef75371",
"max": 69665,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_62b1ff5b24ec4325a054307248c8b9d7",
"value": 69665
}
},
"59a97d2f8fa44a00a0d0895956c8e5cb": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"62b1ff5b24ec4325a054307248c8b9d7": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"736737ab9fb44a579055dcfa0c21cb85": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7ab98f95f0fc4a6f97d1f33a210f3967": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"88742141b97b41b291ae2b2694ba8721": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"921b59c2982b4a27be0d48934f00d088": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_59a97d2f8fa44a00a0d0895956c8e5cb",
"placeholder": "​",
"style": "IPY_MODEL_88742141b97b41b291ae2b2694ba8721",
"value": "Downloading: 100%"
}
},
"968f597834ec4668a0ce90170b9de04e": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9def5996860e44f99bfc3c0274a8f821": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_4b0c66e6eb4b4513b35f94413bd64f4f",
"placeholder": "​",
"style": "IPY_MODEL_367e751d2c484c16a0005b5e9c31f154",
"value": " 68.0k/68.0k [00:00<00:00, 180kB/s]"
}
},
"a05385eb6d104ab68cc605406e44a011": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b74450ee387e445dbfb832a9d1112a59": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c2903f0f04b04c8a8312d5f93d36093a": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d853522fb35f4894b935b360a4afb1b0": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_921b59c2982b4a27be0d48934f00d088",
"IPY_MODEL_3b9c5f31e80e4791894e799f56f5e541",
"IPY_MODEL_373d2e80588042ad815df8f928810439"
],
"layout": "IPY_MODEL_968f597834ec4668a0ce90170b9de04e"
}
},
"e6ec5d13fc6d4296b8f76bbba303701d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_1d5c3a22585945a0af67dd2d3b099a1b",
"IPY_MODEL_558c290455454b4f818c15821d3802a6",
"IPY_MODEL_9def5996860e44f99bfc3c0274a8f821"
],
"layout": "IPY_MODEL_b74450ee387e445dbfb832a9d1112a59"
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}