course/videos/push_to_hub_new.ipynb (552 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "You will need an authentication token with your Hugging Face credentials to use the `push_to_hub` method. Execute `huggingface-cli login` in your terminal or by uncommenting the following cell:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !huggingface-cli login" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "from datasets import load_dataset, load_metric\n", "from transformers import (\n", " AutoModelForSequenceClassification,\n", " AutoTokenizer,\n", " DataCollatorWithPadding,\n", " Trainer,\n", " TrainingArguments,\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "checkpoint = \"bert-base-cased\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset glue (/home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", "Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-8174fd92eed0af98.arrow\n", "Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-8c99fb059544bc96.arrow\n", "Loading cached processed dataset at /home/sgugger/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-e625eb72bcf1ae1f.arrow\n", "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "raw_datasets = load_dataset(\"glue\", \"mrpc\")\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "\n", "def tokenize_function(examples):\n", " return tokenizer(examples[\"sentence1\"], examples[\"sentence2\"], truncation=True)\n", "\n", "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n", "\n", "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n", "\n", "training_args = TrainingArguments(\n", " \"finetuned-bert-mrpc\",\n", " per_device_train_batch_size=16,\n", " per_device_eval_batch_size=16,\n", " learning_rate=2e-5,\n", " weight_decay=0.01,\n", " evaluation_strategy=\"epoch\",\n", " logging_strategy=\"epoch\",\n", " log_level=\"error\",\n", " push_to_hub=True,\n", " push_to_hub_model_id=\"finetuned-bert-mrpc\",\n", " # push_to_hub_organization=\"huggingface\",\n", " # push_to_hub_token=\"my_token\",\n", ")\n", "\n", "data_collator = DataCollatorWithPadding(tokenizer)\n", "\n", "metric = load_metric(\"glue\", \"mrpc\")\n", "\n", "def compute_metrics(eval_preds):\n", " logits, labels = eval_preds\n", " predictions = np.argmax(logits, axis=-1)\n", " return metric.compute(predictions=predictions, references=labels)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "trainer = Trainer(\n", " model,\n", " training_args,\n", " train_dataset=tokenized_datasets[\"train\"],\n", " eval_dataset=tokenized_datasets[\"validation\"],\n", " data_collator=data_collator,\n", " tokenizer=tokenizer,\n", " compute_metrics=compute_metrics,\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " <div>\n", " \n", " <progress value='690' max='690' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", " [690/690 01:13, Epoch 3/3]\n", " </div>\n", " <table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: left;\">\n", " <th>Epoch</th>\n", " <th>Training Loss</th>\n", " <th>Validation Loss</th>\n", " <th>Accuracy</th>\n", " <th>F1</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>0.573500</td>\n", " <td>0.475627</td>\n", " <td>0.774510</td>\n", " <td>0.835714</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>0.383800</td>\n", " <td>0.452076</td>\n", " <td>0.821078</td>\n", " <td>0.878939</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>0.233600</td>\n", " <td>0.485343</td>\n", " <td>0.850490</td>\n", " <td>0.897133</td>\n", " </tr>\n", " </tbody>\n", "</table><p>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "TrainOutput(global_step=690, training_loss=0.39697496718254643, metrics={'train_runtime': 74.2937, 'train_samples_per_second': 148.115, 'train_steps_per_second': 9.287, 'total_flos': 563360051116800.0, 'train_loss': 0.39697496718254643, 'epoch': 3.0})" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.train()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Push to hub from the Trainer directly" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You will need an authentication token with your Hugging Face credentials to use the `push_to_hub` method. Execute `huggingface-cli login` in your terminal or by uncommenting the following cell:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !huggingface-cli login" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The `Trainer` has a new method to directly upload the model, tokenizer and model configuration in a repo on the [Hub](https://huggingface.co/). It will even auto-generate a model card draft using the hyperparameters and evaluation results!" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://huggingface.co/sgugger/finetuned-bert/commit/12c9aaaadf60e2c48e9419524f3170f445a120d6'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.push_to_hub()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you are using your own training loop, you can push the model and tokenizer separately (and you will have to write the model card yourself):" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# model.push_to_hub(\"finetuned-bert-mrpc\")\n", "# tokenizer.push_to_hub(\"finetuned-bert-mrpc\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## You can load your model from anywhere using from_pretrained!" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "533ace86937d46e2bf6e98452fa786aa", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=671.0, style=ProgressStyle(description_…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d5600c7616cc44a0a3cf767fe0612bb0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433336585.0, style=ProgressStyle(descri…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "from transformers import AutoModelForSequenceClassification\n", "\n", "model_name = \"sgugger/finetuned-bert-mrpc\"\n", "model = AutoModelForSequenceClassification.from_pretrained(model_name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## You can use your model in a pipeline!" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "222535481b2d4ded93f597311e7ec283", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descripti…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bccaf4cf945848e3863b984ed8a18a71", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=435816.0, style=ProgressStyle(descripti…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f7aa6d01720542458c454f7f95d2b881", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=112.0, style=ProgressStyle(description_…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "00bbc6ae04a14273a93b8613e75d17e6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=284.0, style=ProgressStyle(description_…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "from transformers import pipeline\n", "\n", "classifier = pipeline(\"text-classification\", model=model_name)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'label': 'LABEL_0', 'score': 0.7789641618728638}]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier(\"My name is Sylvain. [SEP] My name is Lysandre\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Updating a problematic file is super easy!" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "model.config.label2id = {\"not equivalent\": 0, \"equivalent\": 1}" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "model.config.id2label = {0: \"not equivalent\", 1: \"equivalent\"}" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "model.config.push_to_hub(\"finetuned-bert-mrpc\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "09d7cee3537f4385b11dd3d647abca15", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=814.0, style=ProgressStyle(description_…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/plain": [ "[{'label': 'not equivalent', 'score': 0.7789641618728638}]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier = pipeline(\"text-classification\", model=model_name)\n", "\n", "classifier(\"My name is Sylvain. [SEP] My name is Lysandre\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 }