in florence2-webgpu/src/worker.js [85:128]
async function run({ text, url, task }) {
const [model, tokenizer, processor] = await Florence2Singleton.getInstance();
// Read and preprocess image
const start = performance.now();
if (!vision_inputs) {
// Cache vision inputs when possible
const image = await RawImage.fromURL(url);
image_size = image.size;
vision_inputs = await processor(image);
}
let user_input = task;
if (TASKS_WITH_INPUTS.includes(task) && text) {
user_input += text;
}
const prompts = processor.construct_prompts(user_input);
const text_inputs = tokenizer(prompts);
// Generate text
const generated_ids = await model.generate({
...text_inputs,
...vision_inputs,
max_new_tokens: 128,
num_beams: 1,
do_sample: false,
});
// Decode generated text
const generated_text = tokenizer.batch_decode(generated_ids, {
skip_special_tokens: false,
})[0];
// Post-process the generated text
const result = processor.post_process_generation(
generated_text,
task,
image_size,
);
const end = performance.now();
self.postMessage({ status: "complete", result, time: end - start });
}