course/videos/inside_pipeline_pt.ipynb (223 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook regroups the code sample of the video below, which is a part of the [Hugging Face course](https://huggingface.co/course)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form"
},
"outputs": [
{
"data": {
"text/html": [
"<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/1pedAIvTWXk?rel=0&controls=0&showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#@title\n",
"from IPython.display import HTML\n",
"\n",
"HTML('<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/1pedAIvTWXk?rel=0&controls=0&showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Install the Transformers and Datasets libraries to run this notebook."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install datasets transformers[sentencepiece]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'label': 'POSITIVE', 'score': 0.9598047137260437},\n",
" {'label': 'NEGATIVE', 'score': 0.9994558095932007}]"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import pipeline\n",
"\n",
"classifier = pipeline(\"sentiment-analysis\")\n",
"classifier([\n",
" \"I've been waiting for a HuggingFace course my whole life.\", \n",
" \"I hate this so much!\"\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'input_ids': tensor([[ 101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172,\n",
" 2607, 2026, 2878, 2166, 1012, 102],\n",
" [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0, 0,\n",
" 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
" [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}\n"
]
}
],
"source": [
"from transformers import AutoTokenizer\n",
"\n",
"checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n",
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
"\n",
"raw_inputs = [\n",
" \"I've been waiting for a HuggingFace course my whole life.\", \n",
" \"I hate this so much!\",\n",
"]\n",
"inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors=\"pt\")\n",
"print(inputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing DistilBertModel: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.bias', 'classifier.weight']\n",
"- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([2, 16, 768])\n"
]
}
],
"source": [
"from transformers import AutoModel\n",
"\n",
"checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n",
"model = AutoModel.from_pretrained(checkpoint)\n",
"outputs = model(**inputs)\n",
"print(outputs.last_hidden_state.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[-1.5607, 1.6123],\n",
" [ 4.1692, -3.3464]], grad_fn=<AddmmBackward>)\n"
]
}
],
"source": [
"from transformers import AutoModelForSequenceClassification\n",
"\n",
"checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n",
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
"outputs = model(**inputs)\n",
"print(outputs.logits)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[4.0195e-02, 9.5980e-01],\n",
" [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward>)\n"
]
}
],
"source": [
"import torch\n",
"\n",
"predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
"print(predictions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0: 'NEGATIVE', 1: 'POSITIVE'}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.config.id2label"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"name": "What happens inside the pipeline function? (PyTorch)",
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 4
}