course/videos/trainer_api.ipynb (387 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook regroups the code sample of the video below, which is a part of the [Hugging Face course](https://huggingface.co/course)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form" }, "outputs": [ { "data": { "text/html": [ "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/nvBXf7s7vTI?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#@title\n", "from IPython.display import HTML\n", "\n", "HTML('<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/nvBXf7s7vTI?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Install the Transformers and Datasets libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pip install datasets transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset glue (/home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", "Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-8174fd92eed0af98.arrow\n", "Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-8c99fb059544bc96.arrow\n", "Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-e625eb72bcf1ae1f.arrow\n" ] } ], "source": [ "from datasets import load_dataset\n", "from transformers import AutoTokenizer, DataCollatorWithPadding\n", "\n", "raw_datasets = load_dataset(\"glue\", \"mrpc\")\n", "checkpoint = \"bert-base-cased\"\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "\n", "def tokenize_function(examples):\n", " return tokenizer(examples[\"sentence1\"], examples[\"sentence2\"], truncation=True)\n", "\n", "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n", "data_collator = DataCollatorWithPadding(tokenizer)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "from transformers import AutoModelForSequenceClassification\n", "\n", "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import TrainingArguments\n", "\n", "training_args = TrainingArguments(\"test-trainer\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import TrainingArguments\n", "\n", "training_args = TrainingArguments(\n", " \"test-trainer\",\n", " per_device_train_batch_size=16,\n", " per_device_eval_batch_size=16,\n", " num_train_epochs=5,\n", " learning_rate=2e-5,\n", " weight_decay=0.01,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " <div>\n", " \n", " <progress value='1150' max='1150' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", " [1150/1150 01:57, Epoch 5/5]\n", " </div>\n", " <table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: left;\">\n", " <th>Step</th>\n", " <th>Training Loss</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>500</td>\n", " <td>0.443800</td>\n", " </tr>\n", " <tr>\n", " <td>1000</td>\n", " <td>0.127500</td>\n", " </tr>\n", " </tbody>\n", "</table><p>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "TrainOutput(global_step=1150, training_loss=0.25629232904185417, metrics={'train_runtime': 118.2961, 'train_samples_per_second': 155.035, 'train_steps_per_second': 9.721, 'total_flos': 939591087838320.0, 'train_loss': 0.25629232904185417, 'epoch': 5.0})" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import Trainer\n", "\n", "trainer = Trainer(\n", " model,\n", " training_args,\n", " train_dataset=tokenized_datasets[\"train\"],\n", " eval_dataset=tokenized_datasets[\"validation\"],\n", " data_collator=data_collator,\n", " tokenizer=tokenizer,\n", ")\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " <div>\n", " \n", " <progress value='26' max='26' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", " [26/26 00:00]\n", " </div>\n", " " ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(408, 2) (408,)\n" ] } ], "source": [ "predictions = trainer.predict(tokenized_datasets[\"validation\"])\n", "print(predictions.predictions.shape, predictions.label_ids.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'accuracy': 0.8627450980392157, 'f1': 0.9050847457627118}" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "from datasets import load_metric\n", "\n", "metric = load_metric(\"glue\", \"mrpc\")\n", "preds = np.argmax(predictions.predictions, axis=-1)\n", "metric.compute(predictions=preds, references=predictions.label_ids)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metric = load_metric(\"glue\", \"mrpc\")\n", "\n", "def compute_metrics(eval_preds):\n", " logits, labels = eval_preds\n", " predictions = np.argmax(logits, axis=-1)\n", " return metric.compute(predictions=predictions, references=labels)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", " <div>\n", " \n", " <progress value='1377' max='1377' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", " [1377/1377 01:19, Epoch 3/3]\n", " </div>\n", " <table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: left;\">\n", " <th>Epoch</th>\n", " <th>Training Loss</th>\n", " <th>Validation Loss</th>\n", " <th>Accuracy</th>\n", " <th>F1</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>No log</td>\n", " <td>0.410291</td>\n", " <td>0.823529</td>\n", " <td>0.877966</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>0.510500</td>\n", " <td>0.405150</td>\n", " <td>0.845588</td>\n", " <td>0.893401</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>0.258300</td>\n", " <td>0.713577</td>\n", " <td>0.862745</td>\n", " <td>0.902778</td>\n", " </tr>\n", " </tbody>\n", "</table><p>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "TrainOutput(global_step=1377, training_loss=0.306962023633238, metrics={'train_runtime': 79.4823, 'train_samples_per_second': 138.446, 'train_steps_per_second': 17.325, 'total_flos': 530544172170240.0, 'train_loss': 0.306962023633238, 'epoch': 3.0})" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_args = TrainingArguments(\"test-trainer\", evaluation_strategy=\"epoch\")\n", "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n", "\n", "trainer = Trainer(\n", " model,\n", " training_args,\n", " train_dataset=tokenized_datasets[\"train\"],\n", " eval_dataset=tokenized_datasets[\"validation\"],\n", " data_collator=data_collator,\n", " tokenizer=tokenizer,\n", " compute_metrics=compute_metrics\n", ")\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "The Trainer API", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 4 }