in src/pipelines.js [2162:2290]
async _call(images, {
threshold = 0.5,
mask_threshold = 0.5,
overlap_mask_area_threshold = 0.8,
label_ids_to_fuse = null,
target_sizes = null,
subtask = null,
} = {}) {
const isBatched = Array.isArray(images);
if (isBatched && images.length !== 1) {
throw Error("Image segmentation pipeline currently only supports a batch size of 1.");
}
const preparedImages = await prepareImages(images);
const imageSizes = preparedImages.map(x => [x.height, x.width]);
const inputs = await this.processor(preparedImages);
const { inputNames, outputNames } = this.model.sessions['model'];
if (!inputNames.includes('pixel_values')) {
if (inputNames.length !== 1) {
throw Error(`Expected a single input name, but got ${inputNames.length} inputs: ${inputNames}.`);
}
const newName = inputNames[0];
if (newName in inputs) {
throw Error(`Input name ${newName} already exists in the inputs.`);
}
// To ensure compatibility with certain background-removal models,
// we may need to perform a mapping of input to output names
inputs[newName] = inputs.pixel_values;
}
const output = await this.model(inputs);
let fn = null;
if (subtask !== null) {
fn = this.subtasks_mapping[subtask];
} else if (this.processor.image_processor) {
for (const [task, func] of Object.entries(this.subtasks_mapping)) {
if (func in this.processor.image_processor) {
fn = this.processor.image_processor[func].bind(this.processor.image_processor);
subtask = task;
break;
}
}
}
// @ts-expect-error TS2339
const id2label = this.model.config.id2label;
/** @type {ImageSegmentationPipelineOutput[]} */
const annotation = [];
if (!subtask) {
// We define an epsilon to safeguard against numerical/precision issues when detecting
// the normalization mode of the output (i.e., sigmoid already applied, or not).
// See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
const epsilon = 1e-5;
// Perform standard image segmentation
const result = output[outputNames[0]];
for (let i = 0; i < imageSizes.length; ++i) {
const size = imageSizes[i];
const item = result[i];
if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
item.sigmoid_();
}
const mask = await RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
annotation.push({
label: null,
score: null,
mask
});
}
} else if (subtask === 'panoptic' || subtask === 'instance') {
const processed = fn(
output,
threshold,
mask_threshold,
overlap_mask_area_threshold,
label_ids_to_fuse,
target_sizes ?? imageSizes, // TODO FIX?
)[0];
const segmentation = processed.segmentation;
for (const segment of processed.segments_info) {
const maskData = new Uint8ClampedArray(segmentation.data.length);
for (let i = 0; i < segmentation.data.length; ++i) {
if (segmentation.data[i] === segment.id) {
maskData[i] = 255;
}
}
const mask = new RawImage(maskData, segmentation.dims[1], segmentation.dims[0], 1)
annotation.push({
score: segment.score,
label: id2label[segment.label_id],
mask: mask
})
}
} else if (subtask === 'semantic') {
const { segmentation, labels } = fn(output, target_sizes ?? imageSizes)[0];
for (const label of labels) {
const maskData = new Uint8ClampedArray(segmentation.data.length);
for (let i = 0; i < segmentation.data.length; ++i) {
if (segmentation.data[i] === label) {
maskData[i] = 255;
}
}
const mask = new RawImage(maskData, segmentation.dims[1], segmentation.dims[0], 1);
annotation.push({
score: null,
label: id2label[label],
mask: mask
});
}
} else {
throw Error(`Subtask ${subtask} not supported.`);
}
return annotation;
}