in janus-pro-webgpu/src/worker.js [115:227]
async function generate(messages) {
// For this demo, we only respond to the last message
const message = messages.at(-1);
// Tell the main thread we are starting
self.postMessage({ status: "start" });
// Load the pipeline
const [processor, model] = await ImageGenerationPipeline.getInstance();
// Determine if the user wants to generate an image or text
if (message.content.startsWith(IMAGE_GENERATION_COMMAND_PREFIX)) {
const text = message.content.replace(IMAGE_GENERATION_COMMAND_PREFIX, "");
const conversation = [
{
role: "<|User|>", // uses title case
content: text,
},
];
const inputs = await processor(conversation, {
chat_template: "text_to_image",
});
const callback_function = (output) => {
self.postMessage({
status: "image-update",
...output,
});
};
const num_image_tokens = processor.num_image_tokens;
const streamer = new ProgressStreamer(num_image_tokens, callback_function);
const outputs = await model.generate_images({
...inputs,
min_new_tokens: num_image_tokens,
max_new_tokens: num_image_tokens,
do_sample: true,
streamer,
});
const blob = await outputs[0].toBlob();
// Send the output back to the main thread
self.postMessage({
status: "image-update",
blob,
});
} else {
const inputs = await processor(
message.image
? [
{
role: "<|User|>",
content: "<image_placeholder>\n" + message.content,
images: [message.image],
},
]
: [
{
role: "<|System|>",
content:
"You are a helpful assistant. Answer the user's questions in a concise manner.",
},
{
role: "<|User|>",
content: message.content,
},
],
);
let startTime;
let numTokens = 0;
let tps;
const token_callback_function = () => {
startTime ??= performance.now();
if (numTokens++ > 0) {
tps = (numTokens / (performance.now() - startTime)) * 1000;
}
};
const callback_function = (output) => {
self.postMessage({
status: "text-update",
output,
tps,
numTokens,
});
};
const streamer = new TextStreamer(processor.tokenizer, {
skip_prompt: true,
skip_special_tokens: true,
callback_function,
token_callback_function,
});
// Generate response
const outputs = await model.generate({
...inputs,
max_new_tokens: MAX_NEW_TEXT_TOKENS,
do_sample: false,
streamer,
stopping_criteria,
});
}
// Tell the main thread we are done
self.postMessage({
status: "complete",
});
}