in realtime-whisper-webgpu/src/worker.js [45:101]
async function generate({ audio, language }) {
if (processing) return;
processing = true;
// Tell the main thread we are starting
self.postMessage({ status: "start" });
// Retrieve the text-generation pipeline.
const [tokenizer, processor, model] =
await AutomaticSpeechRecognitionPipeline.getInstance();
let startTime;
let numTokens = 0;
let tps;
const token_callback_function = () => {
startTime ??= performance.now();
if (numTokens++ > 0) {
tps = (numTokens / (performance.now() - startTime)) * 1000;
}
};
const callback_function = (output) => {
self.postMessage({
status: "update",
output,
tps,
numTokens,
});
};
const streamer = new TextStreamer(tokenizer, {
skip_prompt: true,
skip_special_tokens: true,
callback_function,
token_callback_function,
});
const inputs = await processor(audio);
const outputs = await model.generate({
...inputs,
max_new_tokens: MAX_NEW_TOKENS,
language,
streamer,
});
const decoded = tokenizer.batch_decode(outputs, {
skip_special_tokens: true,
});
// Send the output back to the main thread
self.postMessage({
status: "complete",
output: decoded,
});
processing = false;
}