course/videos/custom_loss.ipynb (160 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook regroups the code sample of the video below, which is a part of the [Hugging Face course](https://huggingface.co/course)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form" }, "outputs": [ { "data": { "text/html": [ "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/Hm8_PgVTFuc?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#@title\n", "from IPython.display import HTML\n", "\n", "HTML('<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/Hm8_PgVTFuc?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Install the Transformers and Datasets libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pip install datasets transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer, AutoModelForCausalLM\n", "from accelerate import Accelerator\n", "\n", "accelerator = Accelerator()\n", "tokenizer = AutoTokenizer.from_pretrained(\"huggingface-course/code-search-net-tokenizer\")\n", "model = AutoModelForCausalLM.from_pretrained(\"huggingface-course/codeparrot-ds\")\n", "\n", "keytoken_ids = []\n", "for keyword in [\n", " \"plt\",\n", " \"pd\",\n", " \"sk\",\n", " \"fit\",\n", " \"predict\",\n", " \" plt\",\n", " \" pd\",\n", " \" sk\",\n", " \" fit\",\n", " \" predict\",\n", "]:\n", " ids = tokenizer([keyword]).input_ids[0]\n", " keytoken_ids.append(ids[0])\n", "\n", "batch = tokenizer([\"import numpy as np\"], return_tensors=\"pt\")\n", "model = accelerator.prepare(model)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from torch.nn import CrossEntropyLoss\n", "import torch\n", "\n", "\n", "def keytoken_weighted_loss(inputs, logits, keytoken_ids, alpha=1.0):\n", " # Shift so that tokens < n predict n\n", " shift_labels = inputs[..., 1:].contiguous()\n", " shift_logits = logits[..., :-1, :].contiguous()\n", " # Calculate per-token loss\n", " loss_fct = CrossEntropyLoss(reduce=False)\n", " loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))\n", " # Resize and average loss per sample\n", " loss_per_sample = loss.view(shift_logits.size(0), shift_logits.size(1)).mean(axis=1)\n", " # Calculate and scale weighting\n", " weights = torch.stack([(inputs == kt).float() for kt in keytoken_ids]).sum(\n", " axis=[0, 2]\n", " )\n", " weights = alpha * (1.0 + weights)\n", " # Calculate weighted average\n", " weighted_loss = (loss_per_sample * weights).mean()\n", " return weighted_loss" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "logits = model(batch[\"input_ids\"]).logits\n", "loss = keytoken_weighted_loss(batch[\"input_ids\"], logits, keytoken_ids)\n", "accelerator.backward(loss)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import Trainer\n", "\n", "class MyTrainer(Trainer):\n", " def compute_loss(self, model, inputs, return_outputs=False):\n", " input_ids = inputs.get(\"input_ids\")\n", " outputs = model(input_ids)\n", " loss = keytoken_weighted_loss(input_ids, outputs.logits, keytoken_ids)\n", "\n", " return (loss, outputs) if return_outputs else loss" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "Using a custom loss function", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 4 }