semantic-audio-search/worker.js (69 lines of code) (raw):
import {
AutoTokenizer,
ClapTextModelWithProjection,
cos_sim,
} from "@huggingface/transformers";
import { getCachedFile } from "./utils";
class ApplicationSingleton {
static model_id = "Xenova/larger_clap_music_and_speech";
static BASE_URL =
"https://huggingface.co/datasets/Xenova/MusicBenchEmbedded/resolve/main/";
static tokenizer = null;
static text_model = null;
static embeddings = null;
static async getInstance(progress_callback = null) {
this.tokenizer ??= AutoTokenizer.from_pretrained(this.model_id, {
progress_callback,
});
this.text_model ??= ClapTextModelWithProjection.from_pretrained(
this.model_id,
{
progress_callback,
// TODO allow user to select quantized or not
},
);
this.embeddings ??= new Promise((resolve, reject) => {
getCachedFile(this.BASE_URL + "audio-embeddings_52768-512_32bit.bin")
.then((buffer) => {
resolve(new Float32Array(buffer));
})
.catch(reject);
});
return Promise.all([this.tokenizer, this.text_model, this.embeddings]);
}
}
function cosineSimilarity(query_embeds, database_embeds) {
const EMBED_DIM = 512;
const numDB = database_embeds.length / EMBED_DIM;
const similarityScores = new Array(numDB);
for (let i = 0; i < numDB; ++i) {
const startOffset = i * EMBED_DIM;
const dbVector = database_embeds.slice(
startOffset,
startOffset + EMBED_DIM,
);
similarityScores[i] = cos_sim(query_embeds, dbVector);
}
return similarityScores;
}
// Listen for messages from the main thread
self.addEventListener("message", async (event) => {
// Get the tokenizer, model, and embeddings. When called for the first time,
// this will load the files and cache them for future use.
const [tokenizer, text_model, embeddings] =
await ApplicationSingleton.getInstance(self.postMessage);
// Send the output back to the main thread
self.postMessage({ status: "ready" });
// Run tokenization
const text_inputs = tokenizer(event.data.query, {
padding: true,
truncation: true,
});
// Compute embeddings
const { text_embeds } = await text_model(text_inputs);
// Compute similarity scores
const scores = cosineSimilarity(text_embeds.data, embeddings);
const output = scores
.map((score, i) => [score, i]) // Save index
.sort((a, b) => b[0] - a[0]) // Sort by scores
.slice(0, 100); // Get top 100 results
// Send the output back to the main thread
self.postMessage({
status: "complete",
output: output,
});
});