optimum/neuron/models/yolos/model.py (47 lines of code) (raw):
# coding=utf-8
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""YOLOS model on Neuron devices."""
import logging
from typing import Optional
import torch
from transformers import AutoModelForObjectDetection
from transformers.modeling_outputs import ModelOutput
from ...modeling_traced import NeuronTracedModel
from ...utils.doc import (
_PROCESSOR_FOR_IMAGE,
NEURON_IMAGE_INPUTS_DOCSTRING,
NEURON_MODEL_START_DOCSTRING,
NEURON_OBJECT_DETECTION_EXAMPLE,
add_start_docstrings,
add_start_docstrings_to_model_forward,
)
logger = logging.getLogger(__name__)
@add_start_docstrings(
"""
Neuron Model with object detection heads on top, for tasks such as COCO detection.
""",
NEURON_MODEL_START_DOCSTRING,
)
class NeuronYolosForObjectDetection(NeuronTracedModel):
auto_model_class = AutoModelForObjectDetection
@property
def dtype(self) -> Optional["torch.dtype"]:
"""
Torch dtype of the inputs to avoid error in transformers on casting a BatchFeature to type None.
"""
return getattr(self.config.neuron, "input_dtype", torch.float32)
@add_start_docstrings_to_model_forward(
NEURON_IMAGE_INPUTS_DOCSTRING.format("batch_size, num_channels, height, width")
+ NEURON_OBJECT_DETECTION_EXAMPLE.format(
processor_class=_PROCESSOR_FOR_IMAGE,
model_class="NeuronYolosForObjectDetection",
checkpoint="optimum/yolos-tiny-neuronx-bs1",
)
)
def forward(
self,
pixel_values: torch.Tensor,
**kwargs,
):
neuron_inputs = {"pixel_values": pixel_values}
# run inference
with self.neuron_padding_manager(neuron_inputs) as inputs:
outputs = self.model(*inputs) # shape: [batch_size, num_channels, image_size, image_size]
outputs = self.remove_padding(
outputs, dims=[0], indices=[pixel_values.shape[0]]
) # Remove padding on batch_size(0)
logits = outputs[0]
pred_boxes = outputs[1]
last_hidden_state = outputs[2]
return ModelOutput(logits=logits, pred_boxes=pred_boxes, last_hidden_state=last_hidden_state)