community-content/vertex_model_garden/model_oss/tfvision/serving/detection.py (298 lines of code) (raw):
"""Detection input and model functions for serving/inference."""
import functools
import heapq
from typing import Any, Callable, Dict, List, Optional, Text
import tensorflow as tf
from tfvision.serving import automl_constants
from object_detection.utils import label_map_util
from official.core import config_definitions as cfg
from official.projects.yolo.modeling import factory as yolo_factory
from official.projects.yolo.modeling.decoders import yolo_decoder # pylint: disable=unused-import
from official.projects.yolo.serving import model_fn as yolo_model_fn
from official.vision import configs
from official.vision.ops import box_ops
from official.vision.serving import detection as detection_module
def load_label_map_to_string_list(label_map_path: str,
fill_in_gaps_and_background: bool = True
) -> List[str]:
"""Loads class labels as string list ordered by class id.
Args:
label_map_path: the path to label_map.pbtxt with string_int_label_map_pb2
proto format.
fill_in_gaps_and_background: whether to fill in gaps and background with
respect to the id field in the proto. The id: 0 is reserved for the
'background' class and will be added if it is missing. All other missing
ids in range(1, max(id)) will be added with a dummy class name
("class_<id>") if they are missing.
Returns:
The class labels as text string lists in the order of the class numeric id.
"""
labelmap = label_map_util.get_label_map_dict(
label_map_path, fill_in_gaps_and_background=fill_in_gaps_and_background)
heap = []
for label_name, label_id in labelmap.items():
heapq.heappush(heap, (label_id, label_name))
label_list = [heapq.heappop(heap)[1] for _ in range(len(heap))]
return label_list
class DetectionModule(detection_module.DetectionModule):
"""Detection Module."""
def __init__(self,
params: cfg.ExperimentConfig,
*,
batch_size: int,
input_image_size: List[int],
input_type: str = automl_constants.INPUT_TYPE,
num_channels: int = 3,
model: Optional[tf.keras.Model] = None,
label_map_path: Optional[str] = None,
input_name: str = automl_constants.IOD_INPUT_NAME,
key_name: str = automl_constants.INPUT_KEY_NAME):
"""Initializes a module for export.
Args:
params: Experiment params.
batch_size: The batch size of the model input. Can be `int` or None.
input_image_size: List or Tuple of size of the input image. For 2D image,
it is [height, width].
input_type: The input signature type.
num_channels: The number of the image channels.
model: A tf.keras.Model instance to be exported.
label_map_path: A labelmap proto file path.
input_name: A customized input tensor name. This will be used as the
signature's input image argument name.
key_name: A name to the automl model input key.
"""
self._key_name = key_name
if label_map_path is not None:
self._label_map_table = self._generate_label_map_list(label_map_path)
else:
self._label_map_table = None
super().__init__(
params=params,
model=model,
batch_size=batch_size,
input_image_size=input_image_size,
input_name=input_name,
input_type=input_type)
def _generate_label_map_list(self, label_map_path: str) -> tf.Tensor:
"""Generates a list of label texts from a labelmap path."""
mapping_string = tf.convert_to_tensor(
load_label_map_to_string_list(label_map_path))
return tf.lookup.index_to_string_table_from_tensor(
mapping_string, default_value=automl_constants.LOOKUP_DEFAULT_VALUE)
def _generate_class_text_output(self, detection_classes) -> tf.Tensor:
"""Converts class index to class text."""
if self._label_map_table is None:
raise ValueError('_label_map_table is None.')
indices = tf.cast(detection_classes, tf.int64)
indices = tf.reshape(indices, [-1])
values = self._label_map_table.lookup(indices)
return tf.reshape(
values, [-1, tf.array_ops.shape(detection_classes)[1]],
name=automl_constants.DETECTION_CLASSES_AS_TEXT)
def serve(self,
images: tf.Tensor,
key: Optional[tf.Tensor] = None) -> Dict[Text, tf.Tensor]:
"""Cast image to float and run inference.
Args:
images: uint8 Tensor of input images. For input type image tensor, the
shape is [batch_size, None, None, 3], for image_bytes, the shape is
[batch_size].
key: Optional string Tensor of shape [batch_size]. If not provided
output tensors will not contain it as well.
Returns:
Tensor holding detection output logits.
"""
images, anchor_boxes, image_info = self.preprocess(images)
input_image_shape = image_info[:, 1, :]
# To overcome keras.Model extra limitation to save a model with layers that
# have multiple inputs, we use `model.call` here to trigger the forward
# path. Note that, this disables some keras magics happens in `__call__`.
detections = self.model.call(
images=images,
image_shape=input_image_shape,
anchor_boxes=anchor_boxes,
training=False)
if self.params.task.model.detection_generator.apply_nms:
# For RetinaNet model, apply export_config.
if isinstance(self.params.task.model, configs.retinanet.RetinaNet):
export_config = self.params.task.export_config
# Normalize detection box coordinates to [0, 1].
if export_config.output_normalized_coordinates:
detection_boxes = (
detections['detection_boxes'] /
tf.tile(image_info[:, 2:3, :], [1, 1, 2]))
detections['detection_boxes'] = box_ops.normalize_boxes(
detection_boxes, image_info[:, 0:1, :])
# Cast num_detections and detection_classes to float. This allows the
# model inference to work on chain (go/chain) as chain requires floating
# point outputs.
if export_config.cast_num_detections_to_float:
detections['num_detections'] = tf.cast(
detections['num_detections'], dtype=tf.float32)
if export_config.cast_detection_classes_to_float:
detections['detection_classes'] = tf.cast(
detections['detection_classes'], dtype=tf.float32)
final_outputs = {
'detection_boxes': detections['detection_boxes'],
'detection_scores': detections['detection_scores'],
'detection_classes': detections['detection_classes'],
'num_detections': detections['num_detections']
}
else:
final_outputs = {
'decoded_boxes': detections['decoded_boxes'],
'decoded_box_scores': detections['decoded_box_scores']
}
if 'detection_masks' in detections.keys():
final_outputs['detection_masks'] = detections['detection_masks']
# Adding AutoML specific outputs.
if self._label_map_table is not None:
final_outputs.update({
automl_constants.DETECTION_CLASSES_AS_TEXT:
self._generate_class_text_output(detections['detection_classes'])
})
final_outputs.update({'image_info': image_info})
if key is not None:
final_outputs.update({automl_constants.OUTPUT_KEY_NAME: key})
return final_outputs
@tf.function
def inference_from_image_bytes(
self,
inputs: tf.Tensor,
key: tf.Tensor,
) -> Dict[Text, tf.Tensor]:
"""Entry point for model input.
Raw image tensor will be decoded to the desired image format.
Args:
inputs: Image tensor to be feed to the model.
key: AutoML specific input key to track image names or image ids.
Returns:
A dictionary of Tensor that contains model outputs.
"""
with tf.device('cpu:0'):
images = tf.nest.map_structure(
tf.identity,
tf.map_fn(
self._decode_image,
elems=inputs,
fn_output_signature=tf.TensorSpec(
shape=[None] * len(self._input_image_size) +
[self._num_channels],
dtype=tf.uint8),
parallel_iterations=32))
images = tf.stack(images)
return self.serve(images, key)
@tf.function
def inference_from_image_bytes_wo_key(
self, inputs: tf.Tensor) -> Dict[Text, tf.Tensor]:
"""Entry point for model inference without input key tensor.
Raw image tensor will be decoded to the desired image format.
Args:
inputs: Image tensor to be feed to the model.
Returns:
A dictionary of Tensor that contains model outputs.
"""
with tf.device('cpu:0'):
images = tf.nest.map_structure(
tf.identity,
tf.map_fn(
self._decode_image,
elems=inputs,
fn_output_signature=tf.TensorSpec(
shape=[None] * len(self._input_image_size) +
[self._num_channels],
dtype=tf.uint8),
parallel_iterations=32))
images = tf.stack(images)
return self.serve(images)
def get_inference_signatures(
self, function_keys: Dict[Text, Text]
) -> Dict[Text, Callable[[tf.Tensor, tf.Tensor], Dict[Text, tf.Tensor]]]:
"""Gets defined function signatures.
Args:
function_keys: A dictionary with keys as the function to create signature
for and values as the signature keys when returns.
Returns:
A dictionary with key as signature key and value as concrete functions
that can be used for tf.saved_model.save.
"""
signatures = {}
for key, def_name in function_keys.items():
# Adds input string 'key' to image_bytes input type.
if key == automl_constants.INPUT_TYPE:
input_images = tf.TensorSpec(
shape=[self._batch_size], dtype=tf.string, name=self._input_name)
input_key = tf.TensorSpec(
shape=[self._batch_size], dtype=tf.string, name=self._key_name)
signatures[
def_name] = self.inference_from_image_bytes.get_concrete_function(
input_images, input_key)
# For each input type, create a signature without input key tensor.
def_name_wo_key = def_name + automl_constants.NO_KEY_SIG_DEF_SUFFIX
signatures[def_name_wo_key] = (
self.inference_from_image_bytes_wo_key.get_concrete_function(
input_images))
else:
raise ValueError('Unrecognized `input_type`')
return signatures
class YoloDetectionModule(DetectionModule):
"""Yolo detection module for Model Garden."""
def __init__(
self,
params: cfg.ExperimentConfig,
*,
batch_size: int,
input_image_size: List[int],
preprocessor: Callable[..., Any],
inference_step: Callable[..., Any],
input_type: str = automl_constants.INPUT_TYPE,
num_channels: int = 3,
model: Optional[tf.keras.Model] = None,
label_map_path: Optional[str] = None,
input_name: str = automl_constants.IOD_INPUT_NAME,
key_name: str = automl_constants.INPUT_KEY_NAME,
):
"""Initializes a module for export.
Args:
params: Experiment params.
batch_size: The batch size of the model input. Can be `int` or None.
input_image_size: List or Tuple of size of the input image. For 2D image,
it is [height, width].
preprocessor: An optional callable to preprocess the inputs.
inference_step: An optional callable to forward-pass the model.
input_type: The input signature type.
num_channels: The number of the image channels.
model: A tf.keras.Model instance to be exported.
label_map_path: A labelmap proto file path.
input_name: A customized input tensor name. This will be used as the
signature's input image argument name.
key_name: A name to the automl model input key.
"""
super().__init__(
params=params,
batch_size=batch_size,
input_image_size=input_image_size,
input_type=input_type,
num_channels=num_channels,
model=model,
label_map_path=label_map_path,
input_name=input_name,
key_name=key_name,
)
self.preprocessor = preprocessor
self.inference_step = functools.partial(inference_step, model=self.model)
def preprocess(self, images: tf.Tensor) -> None:
raise NotImplementedError('Use self.preprocessor instead.')
def serve(
self, images: tf.Tensor, key: Optional[tf.Tensor] = None
) -> Dict[Text, tf.Tensor]:
"""Cast image to float and run inference.
Args:
images: uint8 Tensor of input images. For input type image tensor, the
shape is [batch_size, None, None, 3], for image_bytes, the shape is
[batch_size].
key: Optional string Tensor of shape [batch_size]. If not provided output
tensors will not contain it as well.
Returns:
Tensor holding detection output logits.
"""
images, image_info = self.preprocessor(images)
final_outputs = self.inference_step((images, image_info))
# Normalize detection box coordinates to [0, 1].
detection_boxes = final_outputs['detection_boxes'] / tf.tile(
image_info[:, 2:3, :], [1, 1, 2]
)
final_outputs['detection_boxes'] = box_ops.normalize_boxes(
detection_boxes, image_info[:, 0:1, :]
)
# Cast num_detections and detection_classes to float. This allows the
# model inference to work on chain (go/chain) as chain requires floating
# point outputs.
final_outputs['num_detections'] = tf.cast(
final_outputs['num_detections'], dtype=tf.float32
)
final_outputs['detection_classes'] = tf.cast(
final_outputs['detection_classes'], dtype=tf.float32
)
# Adding AutoML specific outputs.
if self._label_map_table is not None:
final_outputs.update(
{
automl_constants.DETECTION_CLASSES_AS_TEXT: (
self._generate_class_text_output(
final_outputs['detection_classes']
)
)
}
)
final_outputs.update({'image_info': image_info})
if key is not None:
final_outputs.update({automl_constants.OUTPUT_KEY_NAME: key})
return final_outputs
def create_yolov7_export_module(
params: cfg.ExperimentConfig,
input_type: str,
batch_size: int,
input_image_size: List[int],
num_channels: int = 3,
input_name: Optional[str] = None,
label_map_path: Optional[str] = None,
) -> YoloDetectionModule:
"""Creates YOLO export module for Model Garden."""
input_specs = tf.keras.layers.InputSpec(
shape=[batch_size] + input_image_size + [num_channels]
)
model = yolo_factory.build_yolov7(
input_specs=input_specs,
model_config=params.task.model,
l2_regularization=None,
)
def preprocess_fn(image_tensor):
def normalize_image_fn(inputs):
image = tf.cast(inputs, dtype=tf.float32)
return image / 255.0
# If input_type is `tflite`, do not apply image preprocessing. Only apply
# normalization.
if input_type == 'tflite':
return normalize_image_fn(image_tensor), None
def preprocess_image_fn(inputs):
image = normalize_image_fn(inputs)
(image, image_info) = yolo_model_fn.letterbox(
image,
input_image_size,
letter_box=params.task.validation_data.parser.letter_box,
)
return image, image_info
images_spec = tf.TensorSpec(shape=input_image_size + [3], dtype=tf.float32)
image_info_spec = tf.TensorSpec(shape=[4, 2], dtype=tf.float32)
images, image_info = tf.nest.map_structure(
tf.identity,
tf.map_fn(
preprocess_image_fn,
elems=image_tensor,
fn_output_signature=(images_spec, image_info_spec),
parallel_iterations=32,
),
)
return images, image_info
def inference_steps(inputs, model):
images, image_info = inputs
detection = model.call(images, training=False)
if input_type != 'tflite':
detection['bbox'] = yolo_model_fn.undo_info(
detection['bbox'],
detection['num_detections'],
image_info,
expand=False,
)
final_outputs = {
'detection_boxes': detection['bbox'],
'detection_scores': detection['confidence'],
'detection_classes': detection['classes'],
'num_detections': detection['num_detections'],
}
return final_outputs
export_module = YoloDetectionModule(
params=params,
model=model,
batch_size=batch_size,
input_image_size=input_image_size,
input_type=input_type,
num_channels=num_channels,
input_name=input_name,
label_map_path=label_map_path,
preprocessor=preprocess_fn,
inference_step=inference_steps,
)
return export_module