course/videos/inside_pipeline

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook regroups the code sample of the video below, which is a part of the [Hugging Face course](https://huggingface.co/course)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form" }, "outputs": [ { "data": { "text/html": [ "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/1pedAIvTWXk?rel=0&controls=0&showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#@title\n", "from IPython.display import HTML\n", "\n", "HTML('<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/1pedAIvTWXk?rel=0&controls=0&showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Install the Transformers and Datasets libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pip install datasets transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'label': 'POSITIVE', 'score': 0.9598047137260437},\n", " {'label': 'NEGATIVE', 'score': 0.9994558095932007}]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "\n", "classifier = pipeline(\"sentiment-analysis\")\n", "classifier([\n", " \"I've been waiting for a HuggingFace course my whole life.\", \n", " \"I hate this so much!\"\n", "])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'input_ids': tensor([[ 101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172,\n", " 2607, 2026, 2878, 2166, 1012, 102],\n", " [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0, 0,\n", " 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n", " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}\n" ] } ], "source": [ "from transformers import AutoTokenizer\n", "\n", "checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "\n", "raw_inputs = [\n", " \"I've been waiting for a HuggingFace course my whole life.\", \n", " \"I hate this so much!\",\n", "]\n", "inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors=\"pt\")\n", "print(inputs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing DistilBertModel: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.bias', 'classifier.weight']\n", "- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([2, 16, 768])\n" ] } ], "source": [ "from transformers import AutoModel\n", "\n", "checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n", "model = AutoModel.from_pretrained(checkpoint)\n", "outputs = model(**inputs)\n", "print(outputs.last_hidden_state.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[-1.5607, 1.6123],\n", " [ 4.1692, -3.3464]], grad_fn=<AddmmBackward>)\n" ] } ], "source": [ "from transformers import AutoModelForSequenceClassification\n", "\n", "checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n", "model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n", "outputs = model(**inputs)\n", "print(outputs.logits)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[4.0195e-02, 9.5980e-01],\n", " [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward>)\n" ] } ], "source": [ "import torch\n", "\n", "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n", "print(predictions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 'NEGATIVE', 1: 'POSITIVE'}" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.config.id2label" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "What happens inside the pipeline function? (PyTorch)", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 4 }

course/videos/inside_pipeline_pt.ipynb (223 lines of code) (raw):