course/videos/trainer_api.ipynb (387 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook regroups the code sample of the video below, which is a part of the [Hugging Face course](https://huggingface.co/course)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form"
},
"outputs": [
{
"data": {
"text/html": [
"<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/nvBXf7s7vTI?rel=0&controls=0&showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#@title\n",
"from IPython.display import HTML\n",
"\n",
"HTML('<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/nvBXf7s7vTI?rel=0&controls=0&showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Install the Transformers and Datasets libraries to run this notebook."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install datasets transformers[sentencepiece]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Reusing dataset glue (/home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n",
"Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-8174fd92eed0af98.arrow\n",
"Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-8c99fb059544bc96.arrow\n",
"Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-e625eb72bcf1ae1f.arrow\n"
]
}
],
"source": [
"from datasets import load_dataset\n",
"from transformers import AutoTokenizer, DataCollatorWithPadding\n",
"\n",
"raw_datasets = load_dataset(\"glue\", \"mrpc\")\n",
"checkpoint = \"bert-base-cased\"\n",
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
"\n",
"def tokenize_function(examples):\n",
" return tokenizer(examples[\"sentence1\"], examples[\"sentence2\"], truncation=True)\n",
"\n",
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
"data_collator = DataCollatorWithPadding(tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']\n",
"- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
],
"source": [
"from transformers import AutoModelForSequenceClassification\n",
"\n",
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from transformers import TrainingArguments\n",
"\n",
"training_args = TrainingArguments(\"test-trainer\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from transformers import TrainingArguments\n",
"\n",
"training_args = TrainingArguments(\n",
" \"test-trainer\",\n",
" per_device_train_batch_size=16,\n",
" per_device_eval_batch_size=16,\n",
" num_train_epochs=5,\n",
" learning_rate=2e-5,\n",
" weight_decay=0.01,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='1150' max='1150' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [1150/1150 01:57, Epoch 5/5]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.443800</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.127500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=1150, training_loss=0.25629232904185417, metrics={'train_runtime': 118.2961, 'train_samples_per_second': 155.035, 'train_steps_per_second': 9.721, 'total_flos': 939591087838320.0, 'train_loss': 0.25629232904185417, 'epoch': 5.0})"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import Trainer\n",
"\n",
"trainer = Trainer(\n",
" model,\n",
" training_args,\n",
" train_dataset=tokenized_datasets[\"train\"],\n",
" eval_dataset=tokenized_datasets[\"validation\"],\n",
" data_collator=data_collator,\n",
" tokenizer=tokenizer,\n",
")\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='26' max='26' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [26/26 00:00]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(408, 2) (408,)\n"
]
}
],
"source": [
"predictions = trainer.predict(tokenized_datasets[\"validation\"])\n",
"print(predictions.predictions.shape, predictions.label_ids.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'accuracy': 0.8627450980392157, 'f1': 0.9050847457627118}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"from datasets import load_metric\n",
"\n",
"metric = load_metric(\"glue\", \"mrpc\")\n",
"preds = np.argmax(predictions.predictions, axis=-1)\n",
"metric.compute(predictions=preds, references=predictions.label_ids)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"metric = load_metric(\"glue\", \"mrpc\")\n",
"\n",
"def compute_metrics(eval_preds):\n",
" logits, labels = eval_preds\n",
" predictions = np.argmax(logits, axis=-1)\n",
" return metric.compute(predictions=predictions, references=labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']\n",
"- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='1377' max='1377' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [1377/1377 01:19, Epoch 3/3]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Epoch</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" <th>Accuracy</th>\n",
" <th>F1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>No log</td>\n",
" <td>0.410291</td>\n",
" <td>0.823529</td>\n",
" <td>0.877966</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.510500</td>\n",
" <td>0.405150</td>\n",
" <td>0.845588</td>\n",
" <td>0.893401</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.258300</td>\n",
" <td>0.713577</td>\n",
" <td>0.862745</td>\n",
" <td>0.902778</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=1377, training_loss=0.306962023633238, metrics={'train_runtime': 79.4823, 'train_samples_per_second': 138.446, 'train_steps_per_second': 17.325, 'total_flos': 530544172170240.0, 'train_loss': 0.306962023633238, 'epoch': 3.0})"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_args = TrainingArguments(\"test-trainer\", evaluation_strategy=\"epoch\")\n",
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
"\n",
"trainer = Trainer(\n",
" model,\n",
" training_args,\n",
" train_dataset=tokenized_datasets[\"train\"],\n",
" eval_dataset=tokenized_datasets[\"validation\"],\n",
" data_collator=data_collator,\n",
" tokenizer=tokenizer,\n",
" compute_metrics=compute_metrics\n",
")\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"name": "The Trainer API",
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 4
}