semantic-audio-search/worker.js (69 lines of code) (raw):

import { AutoTokenizer, ClapTextModelWithProjection, cos_sim, } from "@huggingface/transformers"; import { getCachedFile } from "./utils"; class ApplicationSingleton { static model_id = "Xenova/larger_clap_music_and_speech"; static BASE_URL = "https://huggingface.co/datasets/Xenova/MusicBenchEmbedded/resolve/main/"; static tokenizer = null; static text_model = null; static embeddings = null; static async getInstance(progress_callback = null) { this.tokenizer ??= AutoTokenizer.from_pretrained(this.model_id, { progress_callback, }); this.text_model ??= ClapTextModelWithProjection.from_pretrained( this.model_id, { progress_callback, // TODO allow user to select quantized or not }, ); this.embeddings ??= new Promise((resolve, reject) => { getCachedFile(this.BASE_URL + "audio-embeddings_52768-512_32bit.bin") .then((buffer) => { resolve(new Float32Array(buffer)); }) .catch(reject); }); return Promise.all([this.tokenizer, this.text_model, this.embeddings]); } } function cosineSimilarity(query_embeds, database_embeds) { const EMBED_DIM = 512; const numDB = database_embeds.length / EMBED_DIM; const similarityScores = new Array(numDB); for (let i = 0; i < numDB; ++i) { const startOffset = i * EMBED_DIM; const dbVector = database_embeds.slice( startOffset, startOffset + EMBED_DIM, ); similarityScores[i] = cos_sim(query_embeds, dbVector); } return similarityScores; } // Listen for messages from the main thread self.addEventListener("message", async (event) => { // Get the tokenizer, model, and embeddings. When called for the first time, // this will load the files and cache them for future use. const [tokenizer, text_model, embeddings] = await ApplicationSingleton.getInstance(self.postMessage); // Send the output back to the main thread self.postMessage({ status: "ready" }); // Run tokenization const text_inputs = tokenizer(event.data.query, { padding: true, truncation: true, }); // Compute embeddings const { text_embeds } = await text_model(text_inputs); // Compute similarity scores const scores = cosineSimilarity(text_embeds.data, embeddings); const output = scores .map((score, i) => [score, i]) // Save index .sort((a, b) => b[0] - a[0]) // Sort by scores .slice(0, 100); // Get top 100 results // Send the output back to the main thread self.postMessage({ status: "complete", output: output, }); });