in webgpu-nomic-embed/main.js [66:152]
function onFrameUpdate() {
if (!isProcessing) {
isProcessing = true;
(async function () {
// If text inputs have changed, update the embeddings
if (
prevTextInputs !== labelsInput.value ||
prevTemplate !== templateInput.value
) {
textEmbeddings = null;
prevTextInputs = labelsInput.value;
prevTemplate = templateInput.value;
labels = prevTextInputs.split(/\s*,\s*/).filter((x) => x);
if (labels.length > 0) {
const texts = labels.map((x) =>
templateInput.value.replaceAll("{}", x),
);
const text_inputs = tokenizer(texts, {
padding: true,
truncation: true,
});
// Compute embeddings
const { last_hidden_state } = await text_model(text_inputs);
textEmbeddings = mean_pooling(
last_hidden_state,
text_inputs.attention_mask,
);
textEmbeddings = layer_norm(textEmbeddings, [textEmbeddings.dims[1]]);
textEmbeddings = textEmbeddings.normalize(2, -1).tolist();
} else {
overlay.innerHTML = "";
}
}
if (textEmbeddings) {
// Read the current frame from the video
context.drawImage(video, 0, 0, IMAGE_SIZE, IMAGE_SIZE);
const pixelData = context.getImageData(
0,
0,
IMAGE_SIZE,
IMAGE_SIZE,
).data;
const image = new RawImage(pixelData, IMAGE_SIZE, IMAGE_SIZE, 4);
const image_inputs = await processor(image);
// Compute embeddings
const { last_hidden_state } = await vision_model(image_inputs);
const imageEmbedding = last_hidden_state
.mean(1)
.normalize(2, -1)
.tolist()[0];
// Compute similarity
const similarities = textEmbeddings.map(
(x) => dot(x, imageEmbedding) * exp_logit_scale,
);
const sortedIndices = softmax(similarities)
.map((x, i) => [x, i])
.sort((a, b) => b[0] - a[0]);
// Update UI
overlay.innerHTML = "";
for (const [score, index] of sortedIndices) {
overlay.appendChild(
document.createTextNode(`${labels[index]}: ${score.toFixed(2)}`),
);
overlay.appendChild(document.createElement("br"));
}
}
if (previousTime !== undefined) {
const fps = 1000 / (performance.now() - previousTime);
status.textContent = `FPS: ${fps.toFixed(2)}`;
}
previousTime = performance.now();
isProcessing = false;
})();
}
window.requestAnimationFrame(onFrameUpdate);
}