optimum/amd/ryzenai/pipelines/object_detection.py (57 lines of code) (raw):
# Copyright 2023 The HuggingFace Team. All rights reserved.
# Licensed under the MIT License.
from typing import Dict
import torch
from transformers import Pipeline
from transformers.image_utils import load_image
class YoloObjectDetectionPipeline(Pipeline):
def _sanitize_parameters(self, **kwargs):
preprocess_params = {}
if "timeout" in kwargs:
preprocess_params["timeout"] = kwargs["timeout"]
postprocess_params = {}
if "threshold" in kwargs:
postprocess_params["threshold"] = kwargs["threshold"]
if "nms_threshold" in kwargs:
postprocess_params["nms_threshold"] = kwargs["nms_threshold"]
if "data_format" in kwargs:
preprocess_params["data_format"] = kwargs["data_format"]
postprocess_params["data_format"] = kwargs["data_format"]
return preprocess_params, {}, postprocess_params
def preprocess(self, image, timeout=None, data_format=None):
image = load_image(image, timeout=timeout)
image_features = self.image_processor(image, return_tensors=self.framework, data_format=data_format)
return image_features
def _forward(self, model_inputs):
target_sizes = model_inputs.pop("target_sizes")
outputs = self.model(**model_inputs)
model_outputs = {"target_sizes": target_sizes, **outputs}
return model_outputs
def postprocess(self, model_outputs, nms_threshold=0.45, threshold=0.25, data_format=None):
results = []
target_sizes = model_outputs.pop("target_sizes")
outputs = self.image_processor.post_process_object_detection(
outputs=model_outputs,
target_sizes=target_sizes,
threshold=threshold,
nms_threshold=nms_threshold,
data_format=data_format,
)[0]
scores = outputs["scores"]
labels = outputs["labels"]
boxes = outputs["boxes"]
outputs["scores"] = scores.tolist()
outputs["labels"] = [label.item() for label in labels]
outputs["boxes"] = [self._get_bounding_box(box) for box in boxes]
keys = ["score", "label", "box"]
results = [dict(zip(keys, vals)) for vals in zip(outputs["scores"], outputs["labels"], outputs["boxes"])]
return results
def _get_bounding_box(self, box: "torch.Tensor") -> Dict[str, int]:
"""
Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... }
Args:
box (`torch.Tensor`): Tensor containing the coordinates in corners format.
Returns:
bbox (`Dict[str, int]`): Dict containing the coordinates in corners format.
"""
if self.framework != "pt":
raise ValueError("The ObjectDetectionPipeline is only available in PyTorch.")
xmin, ymin, xmax, ymax = box.int().tolist()
bbox = {
"xmin": xmin,
"ymin": ymin,
"xmax": xmax,
"ymax": ymax,
}
return bbox