async _call()

in src/pipelines.js [2162:2290]


    async _call(images, {
        threshold = 0.5,
        mask_threshold = 0.5,
        overlap_mask_area_threshold = 0.8,
        label_ids_to_fuse = null,
        target_sizes = null,
        subtask = null,
    } = {}) {
        const isBatched = Array.isArray(images);

        if (isBatched && images.length !== 1) {
            throw Error("Image segmentation pipeline currently only supports a batch size of 1.");
        }

        const preparedImages = await prepareImages(images);
        const imageSizes = preparedImages.map(x => [x.height, x.width]);

        const inputs = await this.processor(preparedImages);

        const { inputNames, outputNames } = this.model.sessions['model'];
        if (!inputNames.includes('pixel_values')) {
            if (inputNames.length !== 1) {
                throw Error(`Expected a single input name, but got ${inputNames.length} inputs: ${inputNames}.`);
            }

            const newName = inputNames[0];
            if (newName in inputs) {
                throw Error(`Input name ${newName} already exists in the inputs.`);
            }
            // To ensure compatibility with certain background-removal models,
            // we may need to perform a mapping of input to output names
            inputs[newName] = inputs.pixel_values;
        }

        const output = await this.model(inputs);

        let fn = null;
        if (subtask !== null) {
            fn = this.subtasks_mapping[subtask];
        } else if (this.processor.image_processor) {
            for (const [task, func] of Object.entries(this.subtasks_mapping)) {
                if (func in this.processor.image_processor) {
                    fn = this.processor.image_processor[func].bind(this.processor.image_processor);
                    subtask = task;
                    break;
                }
            }
        }

        // @ts-expect-error TS2339
        const id2label = this.model.config.id2label;

        /** @type {ImageSegmentationPipelineOutput[]} */
        const annotation = [];
        if (!subtask) {
            // We define an epsilon to safeguard against numerical/precision issues when detecting
            // the normalization mode of the output (i.e., sigmoid already applied, or not).
            // See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
            const epsilon = 1e-5;

            // Perform standard image segmentation
            const result = output[outputNames[0]];
            for (let i = 0; i < imageSizes.length; ++i) {
                const size = imageSizes[i];
                const item = result[i];
                if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
                    item.sigmoid_();
                }
                const mask = await RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
                annotation.push({
                    label: null,
                    score: null,
                    mask
                });
            }
        } else if (subtask === 'panoptic' || subtask === 'instance') {
            const processed = fn(
                output,
                threshold,
                mask_threshold,
                overlap_mask_area_threshold,
                label_ids_to_fuse,
                target_sizes ?? imageSizes, // TODO FIX?
            )[0];

            const segmentation = processed.segmentation;

            for (const segment of processed.segments_info) {
                const maskData = new Uint8ClampedArray(segmentation.data.length);
                for (let i = 0; i < segmentation.data.length; ++i) {
                    if (segmentation.data[i] === segment.id) {
                        maskData[i] = 255;
                    }
                }

                const mask = new RawImage(maskData, segmentation.dims[1], segmentation.dims[0], 1)

                annotation.push({
                    score: segment.score,
                    label: id2label[segment.label_id],
                    mask: mask
                })
            }

        } else if (subtask === 'semantic') {
            const { segmentation, labels } = fn(output, target_sizes ?? imageSizes)[0];

            for (const label of labels) {
                const maskData = new Uint8ClampedArray(segmentation.data.length);
                for (let i = 0; i < segmentation.data.length; ++i) {
                    if (segmentation.data[i] === label) {
                        maskData[i] = 255;
                    }
                }

                const mask = new RawImage(maskData, segmentation.dims[1], segmentation.dims[0], 1);

                annotation.push({
                    score: null,
                    label: id2label[label],
                    mask: mask
                });
            }
        } else {
            throw Error(`Subtask ${subtask} not supported.`);
        }

        return annotation;
    }