optimum/amd/ryzenai/models/detection_utils.py (47 lines of code) (raw):
# Copyright 2023 The HuggingFace Team. All rights reserved.
# Licensed under the MIT License.
import torch
import torchvision
from transformers.image_transforms import center_to_corners_format
def non_max_suppression(
predictions,
confidence_threshold=0.25,
iou_threshold=0.45,
class_conf_start_index=5,
agnostic=False,
max_detections=300,
):
outputs = []
for image_idx, _ in enumerate(predictions):
prediction = predictions[image_idx]
if class_conf_start_index == 5:
scores = prediction[:, 4:5] * prediction[:, 5:]
else:
scores = prediction[:, class_conf_start_index:]
boxes = center_to_corners_format(prediction[:, :4])
scores, idxs = scores.max(1)
valid_mask = scores > confidence_threshold
boxes = boxes[valid_mask]
scores = scores[valid_mask]
idxs = idxs[valid_mask]
if boxes.shape[0] == 0:
outputs.append(torch.empty((0, 6)))
continue
scores, sorted_indices = scores.sort(descending=True)
boxes = boxes[sorted_indices]
idxs = idxs[sorted_indices]
nms_indices = torch.zeros_like(idxs) if agnostic else idxs
kept_classes = torchvision.ops.batched_nms(boxes, scores, nms_indices, iou_threshold)[:max_detections]
output = torch.cat([boxes[kept_classes], scores[kept_classes][:, None], idxs[kept_classes][:, None]], dim=1)
outputs.append(output)
return outputs
def scale_coords(current_shape, target_shape, coords):
scaling_ratio = max(target_shape[0] / current_shape[0], target_shape[1] / current_shape[1])
padding_height, padding_width = [
(current - target / scaling_ratio) / 2 for target, current in zip(target_shape, current_shape)
]
coords[:, [0, 2]] = (coords[:, [0, 2]] - padding_width) * scaling_ratio
coords[:, [1, 3]] = (coords[:, [1, 3]] - padding_height) * scaling_ratio
coords[:, 0].clamp_(0, target_shape[1])
coords[:, 1].clamp_(0, target_shape[0])
coords[:, 2].clamp_(0, target_shape[1])
coords[:, 3].clamp_(0, target_shape[0])
return coords