async _call()

in src/pipelines.js [2631:2697]


    async _call(images, candidate_labels, {
        threshold = 0.1,
        top_k = null,
        percentage = false,
    } = {}) {

        const isBatched = Array.isArray(images);
        const preparedImages = await prepareImages(images);

        // Run tokenization
        const text_inputs = this.tokenizer(candidate_labels, {
            padding: true,
            truncation: true,
        });

        // Run processor
        const model_inputs = await this.processor(preparedImages);

        // Since non-maximum suppression is performed for exporting, we need to
        // process each image separately. For more information, see:
        // https://github.com/huggingface/optimum/blob/e3b7efb1257c011db907ef40ab340e795cc5684c/optimum/exporters/onnx/model_configs.py#L1028-L1032
        const toReturn = [];
        for (let i = 0; i < preparedImages.length; ++i) {
            const image = preparedImages[i];
            const imageSize = percentage ? null : [[image.height, image.width]];
            const pixel_values = model_inputs.pixel_values[i].unsqueeze_(0);

            // Run model with both text and pixel inputs
            const output = await this.model({ ...text_inputs, pixel_values });

            let result;
            if ('post_process_grounded_object_detection' in this.processor) {
                // @ts-ignore
                const processed = this.processor.post_process_grounded_object_detection(
                    output,
                    text_inputs.input_ids,
                    {
                        // TODO: support separate threshold values
                        box_threshold: threshold,
                        text_threshold: threshold,
                        target_sizes: imageSize,
                    },
                )[0];
                result = processed.boxes.map((box, i) => ({
                    score: processed.scores[i],
                    label: processed.labels[i],
                    box: get_bounding_box(box, !percentage),
                }))
            } else {
                // @ts-ignore
                const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSize, true)[0];
                result = processed.boxes.map((box, i) => ({
                    score: processed.scores[i],
                    label: candidate_labels[processed.classes[i]],
                    box: get_bounding_box(box, !percentage),
                }))
            }
            result.sort((a, b) => b.score - a.score);

            if (top_k !== null) {
                result = result.slice(0, top_k);
            }
            toReturn.push(result)
        }

        return isBatched ? toReturn : toReturn[0];
    }