in src/pipelines.js [2631:2697]
async _call(images, candidate_labels, {
threshold = 0.1,
top_k = null,
percentage = false,
} = {}) {
const isBatched = Array.isArray(images);
const preparedImages = await prepareImages(images);
// Run tokenization
const text_inputs = this.tokenizer(candidate_labels, {
padding: true,
truncation: true,
});
// Run processor
const model_inputs = await this.processor(preparedImages);
// Since non-maximum suppression is performed for exporting, we need to
// process each image separately. For more information, see:
// https://github.com/huggingface/optimum/blob/e3b7efb1257c011db907ef40ab340e795cc5684c/optimum/exporters/onnx/model_configs.py#L1028-L1032
const toReturn = [];
for (let i = 0; i < preparedImages.length; ++i) {
const image = preparedImages[i];
const imageSize = percentage ? null : [[image.height, image.width]];
const pixel_values = model_inputs.pixel_values[i].unsqueeze_(0);
// Run model with both text and pixel inputs
const output = await this.model({ ...text_inputs, pixel_values });
let result;
if ('post_process_grounded_object_detection' in this.processor) {
// @ts-ignore
const processed = this.processor.post_process_grounded_object_detection(
output,
text_inputs.input_ids,
{
// TODO: support separate threshold values
box_threshold: threshold,
text_threshold: threshold,
target_sizes: imageSize,
},
)[0];
result = processed.boxes.map((box, i) => ({
score: processed.scores[i],
label: processed.labels[i],
box: get_bounding_box(box, !percentage),
}))
} else {
// @ts-ignore
const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSize, true)[0];
result = processed.boxes.map((box, i) => ({
score: processed.scores[i],
label: candidate_labels[processed.classes[i]],
box: get_bounding_box(box, !percentage),
}))
}
result.sort((a, b) => b.score - a.score);
if (top_k !== null) {
result = result.slice(0, top_k);
}
toReturn.push(result)
}
return isBatched ? toReturn : toReturn[0];
}