function onFrameUpdate()

in webgpu-clip/main.js [75:153]


function onFrameUpdate() {
  if (!isProcessing) {
    isProcessing = true;
    (async function () {
      // If text inputs have changed, update the embeddings
      if (
        prevTextInputs !== labelsInput.value ||
        prevTemplate !== templateInput.value
      ) {
        textEmbeddings = null;
        prevTextInputs = labelsInput.value;
        prevTemplate = templateInput.value;
        labels = prevTextInputs.split(/\s*,\s*/).filter((x) => x);

        if (labels.length > 0) {
          const texts = labels.map((x) =>
            templateInput.value.replaceAll("{}", x),
          );

          const text_inputs = tokenizer(texts, {
            padding: true,
            truncation: true,
          });

          // Compute embeddings
          const { text_embeds } = await text_model(text_inputs);
          textEmbeddings = text_embeds.normalize().tolist();
        } else {
          overlay.innerHTML = "";
        }
      }

      if (textEmbeddings) {
        // Read the current frame from the video
        context.drawImage(video, 0, 0, IMAGE_SIZE, IMAGE_SIZE);
        const pixelData = context.getImageData(
          0,
          0,
          IMAGE_SIZE,
          IMAGE_SIZE,
        ).data;
        const image = new RawImage(pixelData, IMAGE_SIZE, IMAGE_SIZE, 4);

        const image_inputs = await processor(image);

        // Compute embeddings
        const { image_embeds } = await vision_model(image_inputs);
        const imageEmbedding = image_embeds.normalize().tolist()[0];

        // Compute similarity
        const similarities = textEmbeddings.map(
          (x) => dot(x, imageEmbedding) * exp_logit_scale,
        );

        const sortedIndices = softmax(similarities)
          .map((x, i) => [x, i])
          .sort((a, b) => b[0] - a[0]);

        // Update UI
        overlay.innerHTML = "";
        for (const [score, index] of sortedIndices) {
          overlay.appendChild(
            document.createTextNode(`${labels[index]}: ${score.toFixed(2)}`),
          );
          overlay.appendChild(document.createElement("br"));
        }
      }

      if (previousTime !== undefined) {
        const fps = 1000 / (performance.now() - previousTime);
        status.textContent = `FPS: ${fps.toFixed(2)}`;
      }
      previousTime = performance.now();
      isProcessing = false;
    })();
  }

  window.requestAnimationFrame(onFrameUpdate);
}