course/videos/bleu_metric.ipynb (134 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook regroups the code sample of the video below, which is a part of the [Hugging Face course](https://huggingface.co/course)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form" }, "outputs": [ { "data": { "text/html": [ "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/M05L1DhFqcw?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#@title\n", "from IPython.display import HTML\n", "\n", "HTML('<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/M05L1DhFqcw?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Install the Transformers and Datasets libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pip install datasets transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datasets import load_metric\n", "\n", "bleu = load_metric(\"bleu\")\n", "predictions = [[\"I\", \"have\", \"thirty\", \"six\", \"years\"]]\n", "references = [\n", " [[\"I\", \"am\", \"thirty\", \"six\", \"years\", \"old\"], [\"I\", \"am\", \"thirty\", \"six\"]]\n", "]\n", "bleu.compute(predictions=predictions, references=references)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predictions = [[\"I\", \"have\", \"thirty\", \"six\", \"years\"]]\n", "references = [\n", " [[\"I\", \"am\", \"thirty\", \"six\", \"years\", \"old\"], [\"I\", \"am\", \"thirty\", \"six\"]]\n", "]\n", "bleu.compute(predictions=predictions, references=references)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predictions = [[\"I\", \"have\", \"thirty\", \"six\", \"years\"]]\n", "references = [\n", " [[\"I\", \"am\", \"thirty\", \"six\", \"years\", \"old\"], [\"I\", \"am\", \"thirty\", \"six\"]]\n", "]\n", "bleu.compute(predictions=predictions, references=references)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pip install sacrebleu" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sacrebleu = load_metric(\"sacrebleu\")\n", "# SacreBLEU operates on raw text, not tokens\n", "predictions = [\"I have thirty six years\"]\n", "references = [[\"I am thirty six years old\", \"I am thirty six\"]]\n", "sacrebleu.compute(predictions=predictions, references=references)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "What is the BLEU metric?", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 4 }