candle-wasm-examples/bert/lib-example.html (348 lines of code) (raw):

<html> <head> <meta content="text/html;charset=utf-8" http-equiv="Content-Type" /> <title>Candle Bert</title> </head> <body></body> </html> <!DOCTYPE html> <html> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <style> @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap"); html, body { font-family: "Source Sans 3", sans-serif; } </style> <script src="https://cdn.tailwindcss.com"></script> <script type="module" src="./code.js"></script> <script type="module"> import { hcl } from "https://cdn.skypack.dev/d3-color@3"; import { interpolateReds } from "https://cdn.skypack.dev/d3-scale-chromatic@3"; import { scaleLinear } from "https://cdn.skypack.dev/d3-scale@4"; import { getModelInfo, getEmbeddings, getWikiText, cosineSimilarity, } from "./utils.js"; const bertWorker = new Worker("./bertWorker.js", { type: "module", }); const inputContainerEL = document.querySelector("#input-container"); const textAreaEl = document.querySelector("#input-area"); const outputAreaEl = document.querySelector("#output-area"); const formEl = document.querySelector("#form"); const searchInputEl = document.querySelector("#search-input"); const formWikiEl = document.querySelector("#form-wiki"); const searchWikiEl = document.querySelector("#search-wiki"); const outputStatusEl = document.querySelector("#output-status"); const modelSelectEl = document.querySelector("#model"); const sentencesRegex = /(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s/gm; let sentenceEmbeddings = []; let currInputText = ""; let isCalculating = false; function toggleTextArea(state) { if (state) { textAreaEl.hidden = false; textAreaEl.focus(); } else { textAreaEl.hidden = true; } } inputContainerEL.addEventListener("focus", (e) => { toggleTextArea(true); }); textAreaEl.addEventListener("blur", (e) => { toggleTextArea(false); }); textAreaEl.addEventListener("focusout", (e) => { toggleTextArea(false); if (currInputText === textAreaEl.value || isCalculating) return; populateOutputArea(textAreaEl.value); calculateEmbeddings(textAreaEl.value); }); modelSelectEl.addEventListener("change", (e) => { if (currInputText === "" || isCalculating) return; populateOutputArea(textAreaEl.value); calculateEmbeddings(textAreaEl.value); }); function populateOutputArea(text) { currInputText = text; const sentences = text.split(sentencesRegex); outputAreaEl.innerHTML = ""; for (const [id, sentence] of sentences.entries()) { const sentenceEl = document.createElement("span"); sentenceEl.id = `sentence-${id}`; sentenceEl.innerText = sentence + " "; outputAreaEl.appendChild(sentenceEl); } } formEl.addEventListener("submit", async (e) => { e.preventDefault(); if (isCalculating || currInputText === "") return; toggleInputs(true); const modelID = modelSelectEl.value; const { modelURL, tokenizerURL, configURL, search_prefix } = getModelInfo(modelID); const text = searchInputEl.value; const query = search_prefix + searchInputEl.value; outputStatusEl.classList.remove("invisible"); outputStatusEl.innerText = "Calculating embeddings for query..."; isCalculating = true; const out = await getEmbeddings( bertWorker, modelURL, tokenizerURL, configURL, modelID, [query] ); outputStatusEl.classList.add("invisible"); const queryEmbeddings = out.output[0]; // calculate cosine similarity with all sentences given the query const distances = sentenceEmbeddings .map((embedding, id) => ({ id, similarity: cosineSimilarity(queryEmbeddings, embedding), })) .sort((a, b) => b.similarity - a.similarity) // getting top 10 most similar sentences .slice(0, 10); const colorScale = scaleLinear() .domain([ distances[distances.length - 1].similarity, distances[0].similarity, ]) .range([0, 1]) .interpolate(() => interpolateReds); outputAreaEl.querySelectorAll("span").forEach((el) => { el.style.color = "unset"; el.style.backgroundColor = "unset"; }); distances.forEach((d) => { const el = outputAreaEl.querySelector(`#sentence-${d.id}`); const color = colorScale(d.similarity); const fontColor = hcl(color).l < 70 ? "white" : "black"; el.style.color = fontColor; el.style.backgroundColor = color; }); outputAreaEl .querySelector(`#sentence-${distances[0].id}`) .scrollIntoView({ behavior: "smooth", block: "center", inline: "nearest", }); isCalculating = false; toggleInputs(false); }); async function calculateEmbeddings(text) { isCalculating = true; toggleInputs(true); const modelID = modelSelectEl.value; const { modelURL, tokenizerURL, configURL, document_prefix } = getModelInfo(modelID); const sentences = text.split(sentencesRegex); const allEmbeddings = []; outputStatusEl.classList.remove("invisible"); for (const [id, sentence] of sentences.entries()) { const query = document_prefix + sentence; outputStatusEl.innerText = `Calculating embeddings: sentence ${ id + 1 } of ${sentences.length}`; const embeddings = await getEmbeddings( bertWorker, modelURL, tokenizerURL, configURL, modelID, [query], updateStatus ); allEmbeddings.push(embeddings); } outputStatusEl.classList.add("invisible"); sentenceEmbeddings = allEmbeddings.map((e) => e.output[0]); isCalculating = false; toggleInputs(false); } function updateStatus(data) { if ("status" in data) { if (data.status === "loading") { outputStatusEl.innerText = data.message; outputStatusEl.classList.remove("invisible"); } } } function toggleInputs(state) { const interactive = document.querySelectorAll(".interactive"); interactive.forEach((el) => { if (state) { el.disabled = true; } else { el.disabled = false; } }); } searchWikiEl.addEventListener("input", () => { searchWikiEl.setCustomValidity(""); }); formWikiEl.addEventListener("submit", async (e) => { e.preventDefault(); if ("example" in e.submitter.dataset) { searchWikiEl.value = e.submitter.innerText; } const text = searchWikiEl.value; if (isCalculating || text === "") return; try { const wikiText = await getWikiText(text); searchWikiEl.setCustomValidity(""); textAreaEl.innerHTML = wikiText; populateOutputArea(wikiText); calculateEmbeddings(wikiText); searchWikiEl.value = ""; } catch { searchWikiEl.setCustomValidity("Invalid Wikipedia article name"); searchWikiEl.reportValidity(); } }); </script> </head> <body class="container max-w-4xl mx-auto p-4"> <main class="grid grid-cols-1 gap-5 relative"> <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span> <div> <h1 class="text-5xl font-bold">Candle BERT</h1> <h2 class="text-2xl font-bold">Rust/WASM Demo</h2> <p class="max-w-lg"> Running sentence embeddings and similarity search in the browser using the Bert Model written with <a href="https://github.com/huggingface/candle/" target="_blank" class="underline hover:text-blue-500 hover:no-underline" >Candle </a> and compiled to Wasm. Embeddings models from are from <a href="https://huggingface.co/sentence-transformers/" target="_blank" class="underline hover:text-blue-500 hover:no-underline" > Sentence Transformers </a> and <a href="https://huggingface.co/intfloat/" target="_blank" class="underline hover:text-blue-500 hover:no-underline" > Liang Wang - e5 Models </a> </p> </div> <div> <label for="model" class="font-medium block">Models Options: </label> <select id="model" class="border-2 border-gray-500 rounded-md font-light interactive disabled:cursor-not-allowed w-full max-w-max" > <option value="intfloat_e5_small_v2" selected> intfloat/e5-small-v2 (133 MB) </option> <option value="intfloat_e5_base_v2"> intfloat/e5-base-v2 (438 MB) </option> <option value="intfloat_multilingual_e5_small"> intfloat/multilingual-e5-small (471 MB) </option> <option value="sentence_transformers_all_MiniLM_L6_v2"> sentence-transformers/all-MiniLM-L6-v2 (90.9 MB) </option> <option value="sentence_transformers_all_MiniLM_L12_v2"> sentence-transformers/all-MiniLM-L12-v2 (133 MB) </option> </select> </div> <div> <h3 class="font-medium">Examples:</h3> <form id="form-wiki" class="flex text-xs rounded-md justify-between w-min gap-3" > <input type="submit" hidden /> <button data-example class="disabled:cursor-not-allowed interactive"> Pizza </button> <button data-example class="disabled:cursor-not-allowed interactive"> Paris </button> <button data-example class="disabled:cursor-not-allowed interactive"> Physics </button> <input type="text" id="search-wiki" title="Search Wikipedia article by title" class="font-light py-0 mx-1 resize-none outline-none w-32 disabled:cursor-not-allowed interactive" placeholder="Load Wikipedia article..." /> <button title="Search Wikipedia article and load into input" class="bg-gray-700 hover:bg-gray-800 text-white font-normal px-2 py-1 rounded disabled:bg-gray-300 disabled:cursor-not-allowed interactive" > Load </button> </form> </div> <form id="form" class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center" > <input type="submit" hidden /> <input type="text" id="search-input" class="font-light w-full px-3 py-2 mx-1 resize-none outline-none interactive disabled:cursor-not-allowed" placeholder="Search query here..." /> <button class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed interactive" > Search </button> </form> <div> <h3 class="font-medium">Input text:</h3> <div class="flex justify-between items-center"> <div class="rounded-md inline text-xs"> <span id="output-status" class="m-auto font-light invisible" >C</span > </div> </div> <div id="input-container" tabindex="0" class="min-h-[250px] bg-slate-100 text-gray-500 rounded-md p-4 flex flex-col gap-2 relative" > <textarea id="input-area" hidden value="" placeholder="Input text to perform semantic similarity search..." class="flex-1 resize-none outline-none left-0 right-0 top-0 bottom-0 m-4 absolute interactive disabled:invisible" ></textarea> <p id="output-area" class="grid-rows-2"> Input text to perform semantic similarity search... </p> </div> </div> </main> </body> </html>